From: Michael Liao Date: Thu, 4 Dec 2014 05:20:33 +0000 (+0000) Subject: [X86] Clean up whitespace as well as minor coding style X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=d3c452a5062b6336e0cf4d8ef220e1ffed49da60 [X86] Clean up whitespace as well as minor coding style git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223339 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 65ceb620f0a..b2754e6955a 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -86,7 +86,7 @@ private: typedef std::pair< InfixCalculatorTok, int64_t > ICToken; SmallVector InfixOperatorStack; SmallVector PostfixStack; - + public: int64_t popOperand() { assert (!PostfixStack.empty() && "Poped an empty stack!"); @@ -100,7 +100,7 @@ private: "Unexpected operand!"); PostfixStack.push_back(std::make_pair(Op, Val)); } - + void popOperator() { InfixOperatorStack.pop_back(); } void pushOperator(InfixCalculatorTok Op) { // Push the new operator if the stack is empty. @@ -108,7 +108,7 @@ private: InfixOperatorStack.push_back(Op); return; } - + // Push the new operator if it has a higher precedence than the operator // on the top of the stack or the operator on the top of the stack is a // left parentheses. @@ -118,7 +118,7 @@ private: InfixOperatorStack.push_back(Op); return; } - + // The operator on the top of the stack has higher precedence than the // new operator. unsigned ParenCount = 0; @@ -126,17 +126,17 @@ private: // Nothing to process. if (InfixOperatorStack.empty()) break; - + Idx = InfixOperatorStack.size() - 1; StackOp = InfixOperatorStack[Idx]; if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) break; - + // If we have an even parentheses count and we see a left parentheses, // then stop processing. if (!ParenCount && StackOp == IC_LPAREN) break; - + if (StackOp == IC_RPAREN) { ++ParenCount; InfixOperatorStack.pop_back(); @@ -158,10 +158,10 @@ private: if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) PostfixStack.push_back(std::make_pair(StackOp, 0)); } - + if (PostfixStack.empty()) return 0; - + SmallVector OperandStack; for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { ICToken Op = PostfixStack[i]; @@ -263,7 +263,7 @@ private: State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac), AddImmPrefix(addimmprefix) { Info.clear(); } - + unsigned getBaseReg() { return BaseReg; } unsigned getIndexReg() { return IndexReg; } unsigned getScale() { return Scale; } @@ -1103,7 +1103,7 @@ RewriteIntelBracExpression(SmallVectorImpl *AsmRewrites, (*I).Kind = AOK_Delete; } const char *SymLocPtr = SymName.data(); - // Skip everything before the symbol. + // Skip everything before the symbol. if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { assert(Len > 0 && "Expected a non-negative length."); AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); @@ -1128,7 +1128,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { // identifier. Don't try an parse it as a register. if (Tok.getString().startswith(".")) break; - + // If we're parsing an immediate expression, we don't expect a '['. if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) break; @@ -1194,7 +1194,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { MCSymbol *Sym = getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b"); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - const MCExpr *Val = + const MCExpr *Val = MCSymbolRefExpr::Create(Sym, Variant, getContext()); if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); @@ -1279,7 +1279,7 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, const MCExpr *NewDisp; if (ParseIntelDotOperator(Disp, NewDisp)) return nullptr; - + End = Tok.getEndLoc(); Parser.Lex(); // Eat the field. Disp = NewDisp; diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 5e8c2d609f6..faba02472c8 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -51,8 +51,8 @@ const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); -namespace llvm { - +namespace llvm { + // Fill-ins to make the compiler happy. These constants are never actually // assigned; they are just filler to make an automatically-generated switch // statement work. @@ -127,11 +127,11 @@ static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) { static void logger(void* arg, const char* log) { if (!arg) return; - + raw_ostream &vStream = *(static_cast(arg)); vStream << log << "\n"; -} - +} + // // Public interface for the disassembler // @@ -184,7 +184,7 @@ static void translateRegister(MCInst &mcInst, Reg reg) { } /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the -/// immediate Value in the MCInst. +/// immediate Value in the MCInst. /// /// @param Value - The immediate Value, has had any PC adjustment made by /// the caller. @@ -196,7 +196,7 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was /// called then that function is called to get any symbolic information for the /// immediate in the instruction using the Address, Offset and Width. If that -/// returns non-zero then the symbolic information it returns is used to create +/// returns non-zero then the symbolic information it returns is used to create /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() /// returns zero and isBranch is true then a symbol look up for immediate Value /// is done and if a symbol is found an MCExpr is created with that, else @@ -204,8 +204,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// if it adds an operand to the MCInst and false otherwise. static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, - uint64_t Width, MCInst &MI, - const MCDisassembler *Dis) { + uint64_t Width, MCInst &MI, + const MCDisassembler *Dis) { return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, Width); } @@ -215,7 +215,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, /// These can often be addresses in a literal pool. The Address of the /// instruction and its immediate Value are used to determine the address /// being referenced in the literal pool entry. The SymbolLookUp call back will -/// return a pointer to a literal 'C' string if the referenced address is an +/// return a pointer to a literal 'C' string if the referenced address is an /// address into a section with 'C' string literals. static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, const void *Decoder) { @@ -287,7 +287,7 @@ static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { // Sign-extend the immediate if necessary. OperandType type = (OperandType)operand.type; @@ -407,7 +407,7 @@ static bool translateRMRegister(MCInst &mcInst, debug("A R/M register operand may not have a SIB byte"); return true; } - + switch (insn.eaBase) { default: debug("Unexpected EA base register"); @@ -427,7 +427,7 @@ static bool translateRMRegister(MCInst &mcInst, ALL_REGS #undef ENTRY } - + return false; } @@ -440,26 +440,26 @@ static bool translateRMRegister(MCInst &mcInst, /// from. /// @return - 0 on success; nonzero otherwise static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { // Addresses in an MCInst are represented as five operands: - // 1. basereg (register) The R/M base, or (if there is a SIB) the + // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base - // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified + // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified // scale amount // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) - // the index (which is multiplied by the + // the index (which is multiplied by the // scale amount) // 4. displacement (immediate) 0, or the displacement if there is one // 5. segmentreg (register) x86_registerNONE for now, but could be set // if we have segment overrides - + MCOperand baseReg; MCOperand scaleAmount; MCOperand indexReg; MCOperand displacement; MCOperand segmentReg; uint64_t pcrel = 0; - + if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { if (insn.sibBase != SIB_BASE_NONE) { switch (insn.sibBase) { @@ -512,7 +512,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; - insn.sibIndex = (SIBIndex)(IndexBase + + insn.sibIndex = (SIBIndex)(IndexBase + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); } @@ -534,7 +534,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else { indexReg = MCOperand::CreateReg(0); } - + scaleAmount = MCOperand::CreateImm(insn.sibScale); } else { switch (insn.eaBase) { @@ -553,7 +553,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else baseReg = MCOperand::CreateReg(0); - + indexReg = MCOperand::CreateReg(0); break; case EA_BASE_BX_SI: @@ -584,7 +584,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, // placeholders to keep the compiler happy. #define ENTRY(x) \ case EA_BASE_##x: \ - baseReg = MCOperand::CreateReg(X86::x); break; + baseReg = MCOperand::CreateReg(X86::x); break; ALL_EA_BASES #undef ENTRY #define ENTRY(x) case EA_REG_##x: @@ -595,14 +595,14 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, return true; } } - + scaleAmount = MCOperand::CreateImm(1); } - + displacement = MCOperand::CreateImm(insn.displacement); segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); - + mcInst.addOperand(baseReg); mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); @@ -623,7 +623,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, /// from. /// @return - 0 on success; nonzero otherwise static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, - InternalInstruction &insn, const MCDisassembler *Dis) { + InternalInstruction &insn, const MCDisassembler *Dis) { switch (operand.type) { default: debug("Unexpected type for a R/M operand"); @@ -670,7 +670,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, return translateRMMemory(mcInst, insn, Dis); } } - + /// translateFPRegister - Translates a stack position on the FPU stack to its /// LLVM form, and appends it to an MCInst. /// @@ -698,7 +698,7 @@ static bool translateMaskRegister(MCInst &mcInst, return false; } -/// translateOperand - Translates an operand stored in an internal instruction +/// translateOperand - Translates an operand stored in an internal instruction /// to LLVM's format and appends it to an MCInst. /// /// @param mcInst - The MCInst to append to. @@ -707,7 +707,7 @@ static bool translateMaskRegister(MCInst &mcInst, /// @return - false on success; true otherwise. static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { switch (operand.encoding) { default: debug("Unhandled operand encoding during translation"); @@ -761,7 +761,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, insn, Dis); } } - + /// translateInstruction - Translates an internal instruction and all its /// operands to an MCInst. /// @@ -770,12 +770,12 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, /// @return - false on success; true otherwise. static bool translateInstruction(MCInst &mcInst, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { if (!insn.spec) { debug("Instruction has no specification"); return true; } - + mcInst.setOpcode(insn.instructionID); // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 // prefix bytes should be disassembled as xrelease and xacquire then set the @@ -786,9 +786,9 @@ static bool translateInstruction(MCInst &mcInst, else if(mcInst.getOpcode() == X86::REPNE_PREFIX) mcInst.setOpcode(X86::XACQUIRE_PREFIX); } - + insn.numImmediatesTranslated = 0; - + for (const auto &Op : insn.operands) { if (Op.encoding != ENCODING_NONE) { if (translateOperand(mcInst, Op, insn, Dis)) { @@ -796,7 +796,7 @@ static bool translateInstruction(MCInst &mcInst, } } } - + return false; } @@ -807,9 +807,9 @@ static MCDisassembler *createX86Disassembler(const Target &T, return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); } -extern "C" void LLVMInitializeX86Disassembler() { +extern "C" void LLVMInitializeX86Disassembler() { // Register the disassembler. - TargetRegistry::RegisterMCDisassembler(TheX86_32Target, + TargetRegistry::RegisterMCDisassembler(TheX86_32Target, createX86Disassembler); TargetRegistry::RegisterMCDisassembler(TheX86_64Target, createX86Disassembler); diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index b72730cef52..643db967e56 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -163,8 +163,7 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, int64_t Address; if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { O << formatHex((uint64_t)Address); - } - else { + } else { // Otherwise, just print the expression. O << *Op.getExpr(); } diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 41be14b1b15..245e78face4 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -56,7 +56,7 @@ public: void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } - + void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } @@ -137,7 +137,7 @@ public: private: bool HasCustomInstComment; }; - + } #endif diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 1c8466bf294..79e4331f74d 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -168,21 +168,21 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg); const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp); const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg); - + // If this has a segment register, print it. if (SegReg.getReg()) { printOperand(MI, Op+X86::AddrSegmentReg, O); O << ':'; } - + O << '['; - + bool NeedPlus = false; if (BaseReg.getReg()) { printOperand(MI, Op+X86::AddrBaseReg, O); NeedPlus = true; } - + if (IndexReg.getReg()) { if (NeedPlus) O << " + "; if (ScaleVal != 1) @@ -209,7 +209,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, O << formatImm(DispVal); } } - + O << ']'; } diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index d082f0bd670..b97f152f86a 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -48,7 +48,7 @@ public: O << "opaque ptr "; printMemReference(MI, OpNo, O); } - + void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << "byte ptr "; printMemReference(MI, OpNo, O); @@ -152,7 +152,7 @@ public: printMemOffset(MI, OpNo, O); } }; - + } #endif diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index befa6c29337..60e223e5bd1 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -512,7 +512,7 @@ protected: // Defines a new offset for the CFA. E.g. // // With frame: - // + // // pushq %rbp // L0: // .cfi_def_cfa_offset 16 diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 365cf0ce156..5d6bc5ff7a0 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -759,7 +759,7 @@ namespace X86II { (RegNo > X86::ZMM15 && RegNo <= X86::ZMM31)); } - + inline bool isX86_64NonExtLowByteReg(unsigned reg) { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 31b8e2da8e4..976df94a786 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -721,7 +721,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MemAddr, src1(VEX_4V), src2(ModR/M) // MemAddr, src1(ModR/M), imm8 // - if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand + + if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand + X86::AddrBaseReg).getReg())) VEX_B = 0x0; if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand + @@ -863,7 +863,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3; } EncodeRC = true; - } + } break; case X86II::MRMDestReg: // MRMDestReg instructions forms: diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index aef957136f5..d8320b97736 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -40,8 +40,8 @@ namespace DWARFFlavour { enum { X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2 }; -} - +} + /// N86 namespace - Native X86 register numbers /// namespace N86 { diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 5685a7fde14..67b0c890085 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -184,7 +184,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, if (A->isUndefined() || B->isUndefined()) { StringRef Name = A->isUndefined() ? A->getName() : B->getName(); Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported relocation with subtraction expression, symbol '" + + "unsupported relocation with subtraction expression, symbol '" + Name + "' can not be undefined in a subtraction expression"); } @@ -196,8 +196,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, if (A_Base) { Index = A_Base->getIndex(); IsExtern = 1; - } - else { + } else { Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } @@ -215,8 +214,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, if (B_Base) { Index = B_Base->getIndex(); IsExtern = 1; - } - else { + } else { Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; } diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp index 1ea8798e316..fceb083b5f2 100644 --- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp +++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp @@ -13,7 +13,7 @@ using namespace llvm; Target llvm::TheX86_32Target, llvm::TheX86_64Target; -extern "C" void LLVMInitializeX86TargetInfo() { +extern "C" void LLVMInitializeX86TargetInfo() { RegisterTarget X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above"); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 8fcc85b4dd6..49ae8b4fe3d 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -331,7 +331,7 @@ def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, +def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; @@ -409,7 +409,7 @@ def : Proc<"c3-2", [FeatureSSE1]>; // be good for modern chips without enabling instruction set encodings past the // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and // modern 64-bit x86 chip, and enables features that are generally beneficial. -// +// // We currently use the Sandy Bridge model as the default scheduling model as // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which // covers a huge swath of x86 processors. If there are specific scheduling diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index d43d1bce177..ed3a0a5939c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -137,7 +137,7 @@ private: unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT); unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT); - unsigned X86MaterializeGV(const GlobalValue *GV,MVT VT); + unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT); unsigned fastMaterializeConstant(const Constant *C) override; unsigned fastMaterializeAlloca(const AllocaInst *C) override; @@ -544,7 +544,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { // Ok, we need to do a load from a stub. If we've already loaded from // this stub, reuse the loaded pointer, otherwise emit the load now. - DenseMap::iterator I = LocalValueMap.find(V); + DenseMap::iterator I = LocalValueMap.find(V); unsigned LoadReg; if (I != LocalValueMap.end() && I->second != 0) { LoadReg = I->second; @@ -655,7 +655,7 @@ redo_gep: case Instruction::Alloca: { // Do static allocas. const AllocaInst *A = cast(V); - DenseMap::iterator SI = + DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(A); if (SI != FuncInfo.StaticAllocaMap.end()) { AM.BaseType = X86AddressMode::FrameIndexBase; @@ -903,7 +903,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { unsigned Alignment = S->getAlignment(); unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 + if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = ABIAlignment; bool Aligned = Alignment >= ABIAlignment; @@ -1009,12 +1009,12 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Make the copy. unsigned DstReg = VA.getLocReg(); - const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); + const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. if (!SrcRC->contains(DstReg)) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), - DstReg).addReg(SrcReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); // Add register to return instruction. RetRegs.push_back(VA.getLocReg()); @@ -1030,14 +1030,15 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), - RetReg).addReg(Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), RetReg).addReg(Reg); RetRegs.push_back(RetReg); } // Now emit the RET. MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); return true; @@ -1284,7 +1285,6 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { return true; } - bool X86FastISel::X86SelectBranch(const Instruction *I) { // Unconditional branches are selected by tablegen-generated code. // Handle a conditional branch. @@ -1632,8 +1632,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { TII.get(X86::MOV32r0), Zero32); // Copy the zero into the appropriate sub/super/identical physical - // register. Unfortunately the operations needed are not uniform enough to - // fit neatly into the table above. + // register. Unfortunately the operations needed are not uniform enough + // to fit neatly into the table above. if (VT.SimpleTy == MVT::i16) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), TypeEntry.HighInReg) @@ -1741,7 +1741,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { EVT CmpVT = TLI.getValueType(CmpLHS->getType()); // Emit a compare of the LHS and RHS, setting the flags. if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) - return false; + return false; if (SETFOpc) { unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); @@ -1820,7 +1820,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { if (I->getType() != CI->getOperand(0)->getType() || !((Subtarget->hasSSE1() && RetVT == MVT::f32) || - (Subtarget->hasSSE2() && RetVT == MVT::f64) )) + (Subtarget->hasSSE2() && RetVT == MVT::f64))) return false; const Value *CmpLHS = CI->getOperand(0); @@ -2068,8 +2068,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass; unsigned CopyReg = createResultReg(CopyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), - CopyReg).addReg(InputReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg); InputReg = CopyReg; } @@ -2106,9 +2106,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, VT = MVT::i32; else if (Len >= 2) VT = MVT::i16; - else { + else VT = MVT::i8; - } unsigned Reg; bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); @@ -2528,7 +2527,7 @@ bool X86FastISel::fastLowerArguments() { if (!Subtarget->is64Bit()) return false; - + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; @@ -3324,7 +3323,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, if (!X86SelectAddress(Ptr, AM)) return false; - const X86InstrInfo &XII = (const X86InstrInfo&)TII; + const X86InstrInfo &XII = (const X86InstrInfo &)TII; unsigned Size = DL.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index b6e2d932d81..a8a381d63ef 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -518,7 +518,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMacho()); - + // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the @@ -573,7 +573,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; // If required, include space for extra hidden slot for stashing base pointer. - if (X86FI->getRestoreBasePointer()) + if (X86FI->getRestoreBasePointer()) FrameSize += SlotSize; if (RegInfo->needsStackRealignment(MF)) { // Callee-saved registers are pushed on stack before the stack @@ -1152,7 +1152,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // Does not include any dynamic realign. - const uint64_t StackSize = MFI->getStackSize(); + const uint64_t StackSize = MFI->getStackSize(); { #ifndef NDEBUG const X86RegisterInfo *RegInfo = @@ -1167,7 +1167,7 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F // refer to arguments to the function which are stored in the *callers* // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. - + assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); // We don't handle tail calls, and shouldn't be seeing them diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3ef7b2c7697..7f177bda9ae 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2504,7 +2504,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); + SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); switch (NVT.SimpleTy) { case MVT::i16: ClrNode = diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e1423fa21ce..c21e6146115 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -248,7 +248,7 @@ void X86TargetLowering::resetOperationActions() { // Bypass expensive divides on Atom when compiling with O2. if (TM.getOptLevel() >= CodeGenOpt::Default) { - if (Subtarget->hasSlowDivide32()) + if (Subtarget->hasSlowDivide32()) addBypassSlowDiv(32, 8); if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit()) addBypassSlowDiv(64, 16); @@ -2003,7 +2003,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy); assert(VA.getLocInfo() != CCValAssign::FPExt && - "Unexpected FP-extend for return value."); + "Unexpected FP-extend for return value."); // If this is x86-64, and we disabled SSE, we can't return FP values, // or SSE or MMX vectors. @@ -3492,7 +3492,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // In PIC we need an extra register to formulate the address computation // for the callee. unsigned MaxInRegs = - (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3; + (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -5815,7 +5815,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) { if (Zeroable[i]) continue; - + SDValue Current = Op->getOperand(i); SDValue SrcVector = Current->getOperand(0); if (!V1.getNode()) @@ -6340,8 +6340,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { AllContants = false; NonConstIdx = idx; NumNonConsts++; - } - else { + } else { NumConsts++; if (cast(In)->getZExtValue()) Immediate |= (1ULL << idx); @@ -6364,7 +6363,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { MVT::getIntegerVT(VT.getSizeInBits())); DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm); } - else + else DstVec = DAG.getUNDEF(VT); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, Op.getOperand(NonConstIdx), @@ -6387,7 +6386,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { /// \brief Return true if \p N implements a horizontal binop and return the /// operands for the horizontal binop into V0 and V1. -/// +/// /// This is a helper function of PerformBUILD_VECTORCombine. /// This function checks that the build_vector \p N in input implements a /// horizontal operation. Parameter \p Opcode defines the kind of horizontal @@ -6408,7 +6407,7 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && "Invalid Vector in input!"); - + bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); bool CanFold = true; unsigned ExpectedVExtractIdx = BaseIdx; @@ -6477,13 +6476,13 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, } /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by -/// a concat_vector. +/// a concat_vector. /// /// This is a helper function of PerformBUILD_VECTORCombine. /// This function expects two 256-bit vectors called V0 and V1. /// At first, each vector is split into two separate 128-bit vectors. /// Then, the resulting 128-bit vectors are used to implement two -/// horizontal binary operations. +/// horizontal binary operations. /// /// The kind of horizontal binary operation is defined by \p X86Opcode. /// @@ -6677,18 +6676,18 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, // Try to match an SSE3 float HADD/HSUB. if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); - + if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) { // Try to match an SSSE3 integer HADD/HSUB. if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1); - + if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1); } - + if (!Subtarget->hasAVX()) return SDValue(); @@ -6739,7 +6738,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, // Do this only if the target has AVX2. if (Subtarget->hasAVX2()) return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1); - + // Do not try to expand this build_vector into a pair of horizontal // add/sub if we can emit a pair of scalar add/sub. if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) @@ -7493,9 +7492,9 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT, /// does not check for the profitability of lowering either as PALIGNR or /// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form. /// This matches shuffle vectors that look like: -/// +/// /// v8i16 [11, 12, 13, 14, 15, 0, 1, 2] -/// +/// /// Essentially it concatenates V1 and V2, shifts right by some number of /// elements, and takes the low elements as the result. Note that while this is /// specified as a *right shift* because x86 is little-endian, it is a *left @@ -12785,7 +12784,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, /// Insert one bit to mask vector, like v16i1 or v8i1. /// AVX-512 feature. -SDValue +SDValue X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); @@ -12798,7 +12797,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { // insert element and then truncate the result. MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32); - SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, + SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec), DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx); return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); @@ -13546,7 +13545,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, } return SDValue(); } - + assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -14193,7 +14192,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); InVT = ExtVT; } - + SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, getPointerTy()); @@ -14387,7 +14386,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); } - + unsigned EltBits = EltVT.getSizeInBits(); LLVMContext *Context = DAG.getContext(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... @@ -14414,7 +14413,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted)); } - + // If not vector, then scalar. unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; @@ -14871,12 +14870,12 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, if (Op0.getValueType() == MVT::i1) llvm_unreachable("Unexpected comparison operation for MVT::i1 operands"); } - + if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { - // Do the comparison at i32 if it's smaller, besides the Atom case. - // This avoids subregister aliasing issues. Keep the smaller reference - // if we're optimizing for size, however, as that'll allow better folding + // Do the comparison at i32 if it's smaller, besides the Atom case. + // This avoids subregister aliasing issues. Keep the smaller reference + // if we're optimizing for size, however, as that'll allow better folding // of memory operations. if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 && !DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute( @@ -14934,7 +14933,7 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, return SDValue(); EVT VT = Op.getValueType(); - + // SSE1 has rsqrtss and rsqrtps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision @@ -14962,9 +14961,9 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, // significant digits in the divisor. if (!Subtarget->useReciprocalEst()) return SDValue(); - + EVT VT = Op.getValueType(); - + // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision @@ -15783,11 +15782,11 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget ((Subtarget->hasDQI() && Subtarget->hasVLX() && VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) || - + ((Subtarget->hasDQI() && VT.is512BitVector() && VTElt.getSizeInBits() >= 32)))) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); - + unsigned int NumElts = VT.getVectorNumElements(); if (NumElts != 8 && NumElts != 16) @@ -16881,12 +16880,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, RoundingMode), Mask, Src0, Subtarget, DAG); - } + } case INTR_TYPE_2OP_MASK: { return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1), Op.getOperand(2)), Op.getOperand(4), Op.getOperand(3), Subtarget, DAG); - } + } case CMP_MASK: case CMP_MASK_CC: { // Comparison intrinsics with masks. @@ -17490,7 +17489,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, switch(IntrData->Type) { default: llvm_unreachable("Unknown Intrinsic Type"); - break; + break; case RDSEED: case RDRAND: { // Emit the node with the right value type. @@ -18600,7 +18599,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // If possible, lower this packed shift into a vector multiply instead of // expanding it into a sequence of scalar shifts. // Do this only if the vector shift count is a constant build_vector. - if (Op.getOpcode() == ISD::SHL && + if (Op.getOpcode() == ISD::SHL && (VT == MVT::v8i16 || VT == MVT::v4i32 || (Subtarget->hasInt256() && VT == MVT::v16i16)) && ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { @@ -18692,15 +18691,15 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, CanBeSimplified = Amt2 == Amt->getOperand(j); } } - + if (CanBeSimplified && isa(Amt1) && isa(Amt2)) { // Replace this node with two shifts followed by a MOVSS/MOVSD. EVT CastVT = MVT::v4i32; - SDValue Splat1 = + SDValue Splat1 = DAG.getConstant(cast(Amt1)->getAPIntValue(), VT); SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); - SDValue Splat2 = + SDValue Splat2 = DAG.getConstant(cast(Amt2)->getAPIntValue(), VT); SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); if (TargetOpcode == X86ISD::MOVSD) @@ -21009,7 +21008,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addReg(restoreDstReg).addMBB(restoreMBB); // restoreMBB: - if (RegInfo->hasBasePointer(*MF)) { + if (RegInfo->hasBasePointer(*MF)) { const X86Subtarget &STI = MF->getTarget().getSubtarget(); const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); X86MachineFunctionInfo *X86FI = MF->getInfo(); @@ -21094,7 +21093,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Replace 213-type (isel default) FMA3 instructions with 231-type for // accumulator loops. Writing back to the accumulator allows the coalescer -// to remove extra copies in the loop. +// to remove extra copies in the loop. MachineBasicBlock * X86TargetLowering::emitFMA3Instr(MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -22380,7 +22379,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, EVT SVT = BC0.getValueType(); unsigned Opcode = BC0.getOpcode(); unsigned NumElts = VT.getVectorNumElements(); - + if (BC0.hasOneUse() && SVT.isVector() && SVT.getVectorNumElements() * 2 == NumElts && TLI.isOperationLegal(Opcode, VT)) { @@ -23646,7 +23645,7 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, // fold (blend A, B, allOnes) -> B if (ISD::isBuildVectorAllOnes(Mask.getNode())) return Op1; - + // Simplify the case where the mask is a constant i32 value. if (ConstantSDNode *C = dyn_cast(Mask)) { if (C->isNullValue()) @@ -26314,7 +26313,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, // "load" ports instead of the dedicated "store" port. // E.g., on Haswell: // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. - // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. + // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. if (isLegalAddressingMode(AM, Ty)) // Scale represents reg2 * scale, thus account for 1 // as soon as we use a second register. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 01e9ce7078f..273b4add5e7 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -26,7 +26,7 @@ class X86VectorVTInfo opc, string OpcodeStr, def Zrr : AVX5128I, EVEX, EVEX_V512; - def Zkrr : AVX5128I, EVEX, EVEX_V512, EVEX_KZ; } @@ -672,7 +672,7 @@ multiclass avx512_int_broadcast_reg opc, string OpcodeStr, defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>; defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>, VEX_W; - + def : Pat <(v16i32 (X86vzext VK16WM:$mask)), (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>; @@ -710,7 +710,7 @@ multiclass avx512_int_broadcast_rm opc, string OpcodeStr, (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX; def krr : AVX5128I, @@ -718,13 +718,13 @@ multiclass avx512_int_broadcast_rm opc, string OpcodeStr, let mayLoad = 1 in { def rm : AVX5128I, EVEX; def krm : AVX5128I, EVEX, EVEX_KZ; } } @@ -777,7 +777,7 @@ def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), (VBROADCASTSSZr VR128X:$src)>; def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))), (VBROADCASTSDZr VR128X:$src)>; - + // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), @@ -788,7 +788,7 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), let Predicates = [HasAVX512] in { def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))), - (EXTRACT_SUBREG + (EXTRACT_SUBREG (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), addr:$src)), sub_ymm)>; } @@ -802,7 +802,7 @@ let Predicates = [HasCDI] in def Zrr : AVX512XS8I, EVEX, EVEX_V512; - + let Predicates = [HasCDI, HasVLX] in { def Z128rr : AVX512XS8I; // -- VPERM - register form -- -multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, +multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { def rr : AVX5128I opc, string OpcodeStr, RegisterClass RC, defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, +defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let ExeDomain = SSEPackedSingle in defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, +defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // -- VPERM2I - 3 source operands form -- @@ -1050,14 +1050,14 @@ multiclass avx512_blendmask opc, string OpcodeStr, } let ExeDomain = SSEPackedSingle in -defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", +defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem, - memopv16f32, vselect, v16f32>, + memopv16f32, vselect, v16f32>, EVEX_CD8<32, CD8VF>, EVEX_V512; let ExeDomain = SSEPackedDouble in -defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", +defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem, - memopv8f64, vselect, v8f64>, + memopv8f64, vselect, v8f64>, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1), @@ -1070,14 +1070,14 @@ def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1), (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), VR512:$src1, VR512:$src2)>; -defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", - VK16WM, VR512, f512mem, - memopv16i32, vselect, v16i32>, +defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", + VK16WM, VR512, f512mem, + memopv16i32, vselect, v16i32>, EVEX_CD8<32, CD8VF>, EVEX_V512; -defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", - VK8WM, VR512, f512mem, - memopv8i64, vselect, v8i64>, +defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", + VK8WM, VR512, f512mem, + memopv8i64, vselect, v8i64>, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1), @@ -1093,15 +1093,15 @@ def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1), let Predicates = [HasAVX512] in { def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), (v8f32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (EXTRACT_SUBREG + (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (EXTRACT_SUBREG - (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), + (EXTRACT_SUBREG + (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; } @@ -1494,7 +1494,7 @@ def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), FROUND_NO_EXC)), (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR16)>; - + def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), (v8f64 VR512:$src2), imm:$cc, (i8 -1), FROUND_NO_EXC)), @@ -2302,11 +2302,11 @@ def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { let hasSideEffects = 0 in { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), + def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128X:$dst, (vt (OpNode VR128X:$src1, (scalar_to_vector RC:$src2))))], @@ -2384,7 +2384,7 @@ let Predicates = [HasAVX512] in { // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSZrr (v4f32 (V_SET0)), + (VMOVSSZrr (v4f32 (V_SET0)), (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), (SUBREG_TO_REG (i32 0), @@ -2513,7 +2513,7 @@ let AddedComplexity = 15 in def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, (v2i64 (X86vzmovl + [(set VR128X:$dst, (v2i64 (X86vzmovl (v2i64 VR128X:$src))))], IIC_SSE_MOVQ_RR>, EVEX, VEX_W; @@ -2535,7 +2535,7 @@ let Predicates = [HasAVX512] in { (VMOV64toPQIZrr GR64:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIZrr GR32:$src)>; - + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), @@ -2946,7 +2946,7 @@ multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, def rr : AVX512BI, EVEX_4V; def rm : AVX512BI opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, + SDNode OpNode, PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { def ri : AVX512Ii8 opc, string OpcodeStr, RegisterClass KRC, - RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, +multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, SDNode OpNode, ValueType vt> { def rr : AVX512PI, EVEX_4V; def rm : AVX512PI, EVEX_4V; } @@ -3151,7 +3151,7 @@ def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1), // AVX-512 Shift instructions //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { defm ri : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, defm Z : avx512_shift_rrm, EVEX_V512; } -multiclass avx512_varshift_types opcd, bits<8> opcq, string OpcodeStr, +multiclass avx512_varshift_types opcd, bits<8> opcq, string OpcodeStr, SDNode OpNode> { - defm D : avx512_varshift_sizes, EVEX_CD8<32, CD8VQ>; - defm Q : avx512_varshift_sizes, EVEX_CD8<32, CD8VQ>; + defm Q : avx512_varshift_sizes, EVEX_CD8<64, CD8VQ>, VEX_W; } @@ -3237,22 +3237,22 @@ multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, EVEX_4V; } -defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, +defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, +defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, +defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, +defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, +defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32, i512mem, memopv16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, +defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, i512mem, memopv8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -3260,7 +3260,7 @@ defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// -multiclass avx512_movddup { def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -3465,9 +3465,9 @@ let ExeDomain = SSEPackedDouble in { // Scalar FMA let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, +multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, PatFrag mem_frag> { let isCommutable = 1 in def r : AVX512FMA3, Requires<[HasAVX512]>; -multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, +multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT, ValueType InVT, Domain d> { let hasSideEffects = 0 in { @@ -3813,7 +3813,7 @@ defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; - + def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))), (VCVTPD2PSZrr VR512:$src)>; @@ -3842,7 +3842,7 @@ defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - memopv8f64, f512mem, v8i32, v8f64, + memopv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; @@ -3860,7 +3860,7 @@ defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uin memopv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, PS, VEX_W, EVEX_CD8<64, CD8VF>; - + // cvttpd2udq (src, 0, mask-all-ones, sae-current) def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src), (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)), @@ -3870,16 +3870,16 @@ defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, memopv4i64, f256mem, v8f64, v8i32, SSEPackedDouble>, EVEX_V512, XS, EVEX_CD8<32, CD8VH>; - + defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, memopv16i32, f512mem, v16f32, v16i32, SSEPackedSingle>, EVEX_V512, XD, EVEX_CD8<32, CD8VF>; def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), - (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - + def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; @@ -3887,7 +3887,7 @@ def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; - + def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; @@ -4032,7 +4032,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } - + /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop> { @@ -4210,7 +4210,7 @@ multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { } let Predicates = [HasERI], hasSideEffects = 0 in { - + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; @@ -4249,7 +4249,7 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, VR128X:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, + [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2))], itins_s.rr>, XS, EVEX_4V; let mayLoad = 1 in { @@ -4263,7 +4263,7 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, + [(set VR128X:$dst, (F32Int VR128X:$src1, sse_load_f32:$src2))], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; } @@ -4277,7 +4277,7 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, VR128X:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, + [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2))], itins_s.rr>, XD, EVEX_4V, VEX_W; let mayLoad = 1 in { @@ -4291,8 +4291,8 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, (ins VR128X:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, - (F64Int VR128X:$src1, sse_load_f64:$src2))]>, + [(set VR128X:$dst, + (F64Int VR128X:$src1, sse_load_f64:$src2))]>, XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; } } @@ -4324,8 +4324,8 @@ multiclass avx512_sqrt_packed_all opc, string OpcodeStr, defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; -defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", - int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, +defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", + int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>; let Predicates = [HasAVX512] in { @@ -4335,7 +4335,7 @@ let Predicates = [HasAVX512] in { def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), (VSQRTPDZr VR512:$src1)>; - + def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -4444,7 +4444,7 @@ let ExeDomain = GenericDomain in { (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128X:$dst, (F32Int VR128X:$src1, + [(set VR128X:$dst, (F32Int VR128X:$src1, sse_load_f32:$src2, imm:$src3))]>, EVEX_CD8<32, CD8VT1>; @@ -4536,7 +4536,7 @@ let ExeDomain = d in { defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X, SSEPackedSingle>, EVEX_CD8<32, CD8VT1>; - + defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X, SSEPackedDouble>, EVEX_CD8<64, CD8VT1>; @@ -4617,7 +4617,7 @@ multiclass avx512_trunc_sat opc, string OpcodeStr, []>, EVEX, EVEX_K; } -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, +defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; @@ -4765,7 +4765,7 @@ defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; } - + defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, @@ -4830,7 +4830,7 @@ defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; - + defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; @@ -5004,7 +5004,7 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), (VPABSQZrr VR512:$src)>; -multiclass avx512_conflict opc, string OpcodeStr, +multiclass avx512_conflict opc, string OpcodeStr, RegisterClass RC, RegisterClass KRC, X86MemOperand x86memop, X86MemOperand x86scalar_mop, string BrdcstStr> { @@ -5037,7 +5037,7 @@ multiclass avx512_conflict opc, string OpcodeStr, ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}", BrdcstStr, "}"), []>, EVEX, EVEX_KZ, EVEX_B; - + let Constraints = "$src1 = $dst" in { def rrk : AVX5128I, EVEX; } - + multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo, string OpcodeStr, Predicate prd> { let Predicates = [prd] in @@ -5152,5 +5152,5 @@ multiclass avx512_convert_mask_to_vector { defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr, HasDQI>, VEX_W; } - + defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 0b526158529..530154b17bf 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1280,7 +1280,7 @@ let isCompare = 1 in { // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the // register class is constrained to GR8_NOREX. This pseudo is explicitly // marked side-effect free, since it doesn't have an isel pattern like - // other test instructions. + // other test instructions. let isPseudo = 1, hasSideEffects = 0 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index ee44afc39d0..e1abf26664b 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -17,13 +17,13 @@ // FPStack specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, +def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, SDTCisVT<1, f80>]>; def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>, - SDTCisPtrTy<1>, + SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>, - SDTCisPtrTy<1>, + SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; @@ -98,7 +98,7 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // All FP Stack operations are represented with four instructions here. The // first three instructions, generated by the instruction selector, use "RFP32" // "RFP64" or "RFP80" registers: traditional register files to reference 32-bit, -// 64-bit or 80-bit floating point values. These sizes apply to the values, +// 64-bit or 80-bit floating point values. These sizes apply to the values, // not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be // copied to each other without losing information. These instructions are all // pseudo instructions and use the "_Fp" suffix. @@ -107,7 +107,7 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // The second instruction is defined with FPI, which is the actual instruction // emitted by the assembler. These use "RST" registers, although frequently // the actual register(s) used are implicit. These are always 80 bits. -// The FP stackifier pass converts one to the other after register allocation +// The FP stackifier pass converts one to the other after register allocation // occurs. // // Note that the FpI instruction should have instruction selection info (e.g. @@ -139,66 +139,66 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, // These instructions cannot address 80-bit memory. multiclass FPBinary { // ST(0) = ST(0) + [mem] -def _Fp32m : FpIf32<(outs RFP32:$dst), +def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP32:$dst, + [(set RFP32:$dst, (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>; -def _Fp64m : FpIf64<(outs RFP64:$dst), +def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP64:$dst, + [(set RFP64:$dst, (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>; -def _Fp64m32: FpIf64<(outs RFP64:$dst), +def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP64:$dst, + [(set RFP64:$dst, (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>; -def _Fp80m32: FpI_<(outs RFP80:$dst), +def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP80:$dst, + [(set RFP80:$dst, (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>; -def _Fp80m64: FpI_<(outs RFP80:$dst), +def _Fp80m64: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP80:$dst, + [(set RFP80:$dst, (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>; -def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), - !strconcat("f", asmstring, "{s}\t$src")> { - let mayLoad = 1; +def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), + !strconcat("f", asmstring, "{s}\t$src")> { + let mayLoad = 1; } -def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), - !strconcat("f", asmstring, "{l}\t$src")> { - let mayLoad = 1; +def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), + !strconcat("f", asmstring, "{l}\t$src")> { + let mayLoad = 1; } // ST(0) = ST(0) + [memint] -def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), +def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), +def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (X86fild addr:$src2, i32)))]>; -def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), +def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), +def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (X86fild addr:$src2, i32)))]>; -def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), +def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW, [(set RFP80:$dst, (OpNode RFP80:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), +def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), OneArgFPRW, [(set RFP80:$dst, (OpNode RFP80:$src1, (X86fild addr:$src2, i32)))]>; -def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), - !strconcat("fi", asmstring, "{s}\t$src")> { - let mayLoad = 1; +def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), + !strconcat("fi", asmstring, "{s}\t$src")> { + let mayLoad = 1; } -def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), - !strconcat("fi", asmstring, "{l}\t$src")> { - let mayLoad = 1; +def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), + !strconcat("fi", asmstring, "{l}\t$src")> { + let mayLoad = 1; } } @@ -500,7 +500,7 @@ def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst", IIC_FST>; def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst", IIC_FST>; -def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), +def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst", IIC_FST>; } @@ -636,7 +636,7 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>; def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), "fxsave\t$dst", [], IIC_FXSAVE>, TB; def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), - "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB, + "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB, Requires<[In64BitMode]>; def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), "fxrstor\t$src", [], IIC_FXRSTOR>, TB; @@ -656,12 +656,12 @@ def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>; // Required for CALL which return f32 / f64 / f80 values. def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>; -def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, +def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, RFP64:$src)>; def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>; -def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, +def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, RFP80:$src)>; -def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, +def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, RFP80:$src)>; def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index fe4ead1905c..d7de9219811 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -327,26 +327,26 @@ class I o, Format f, dag outs, dag ins, string asm, let Pattern = pattern; let CodeSize = 3; } -class Ii8 o, Format f, dag outs, dag ins, string asm, +class Ii8 o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary, Domain d = GenericDomain> : X86Inst { let Pattern = pattern; let CodeSize = 3; } -class Ii8PCRel o, Format f, dag outs, dag ins, string asm, +class Ii8PCRel o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : X86Inst { let Pattern = pattern; let CodeSize = 3; } -class Ii16 o, Format f, dag outs, dag ins, string asm, +class Ii16 o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : X86Inst { let Pattern = pattern; let CodeSize = 3; } -class Ii32 o, Format f, dag outs, dag ins, string asm, +class Ii32 o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : X86Inst { let Pattern = pattern; @@ -359,14 +359,14 @@ class Ii32S o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } -class Ii16PCRel o, Format f, dag outs, dag ins, string asm, +class Ii16PCRel o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : X86Inst { let Pattern = pattern; let CodeSize = 3; } -class Ii32PCRel o, Format f, dag outs, dag ins, string asm, +class Ii32PCRel o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : X86Inst { let Pattern = pattern; @@ -393,14 +393,14 @@ class FpI_ pattern, // Iseg16 - 16-bit segment selector, 16-bit offset // Iseg32 - 16-bit segment selector, 32-bit offset -class Iseg16 o, Format f, dag outs, dag ins, string asm, +class Iseg16 o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : X86Inst { let Pattern = pattern; let CodeSize = 3; } -class Iseg32 o, Format f, dag outs, dag ins, string asm, +class Iseg32 o, Format f, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : X86Inst { let Pattern = pattern; @@ -478,7 +478,7 @@ class PIi8 o, Format F, dag outs, dag ins, string asm, } // SSE1 Instruction Templates: -// +// // SSI - SSE1 instructions with XS prefix. // PSI - SSE1 instructions with PS prefix. // PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix. @@ -509,7 +509,7 @@ class VPSI o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX]>; // SSE2 Instruction Templates: -// +// // SDI - SSE2 instructions with XD prefix. // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. // S2SI - SSE2 instructions with XS prefix. @@ -573,16 +573,16 @@ class MMXS2SIi8 o, Format F, dag outs, dag ins, string asm, : Ii8, XS, Requires<[HasSSE2]>; // SSE3 Instruction Templates: -// +// // S3I - SSE3 instructions with PD prefixes. // S3SI - SSE3 instructions with XS prefix. // S3DI - SSE3 instructions with XD prefix. -class S3SI o, Format F, dag outs, dag ins, string asm, +class S3SI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, XS, Requires<[UseSSE3]>; -class S3DI o, Format F, dag outs, dag ins, string asm, +class S3DI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, XD, Requires<[UseSSE3]>; @@ -593,7 +593,7 @@ class S3I o, Format F, dag outs, dag ins, string asm, // SSSE3 Instruction Templates: -// +// // SS38I - SSSE3 instructions with T8 prefix. // SS3AI - SSSE3 instructions with TA prefix. // MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands. @@ -621,7 +621,7 @@ class MMXSS3AI o, Format F, dag outs, dag ins, string asm, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: -// +// // SS48I - SSE 4.1 instructions with T8 prefix. // SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8. // @@ -635,7 +635,7 @@ class SS4AIi8 o, Format F, dag outs, dag ins, string asm, Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: -// +// // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> @@ -868,27 +868,27 @@ class VRS2I o, Format F, dag outs, dag ins, string asm, // MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix. // MMXID - MMX instructions with XD prefix. // MMXIS - MMX instructions with XS prefix. -class MMXI o, Format F, dag outs, dag ins, string asm, +class MMXI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, PS, Requires<[HasMMX]>; -class MMXI32 o, Format F, dag outs, dag ins, string asm, +class MMXI32 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, PS, Requires<[HasMMX,Not64BitMode]>; -class MMXI64 o, Format F, dag outs, dag ins, string asm, +class MMXI64 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, PS, Requires<[HasMMX,In64BitMode]>; -class MMXRI o, Format F, dag outs, dag ins, string asm, +class MMXRI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, PS, REX_W, Requires<[HasMMX]>; -class MMX2I o, Format F, dag outs, dag ins, string asm, +class MMX2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, PD, Requires<[HasMMX]>; -class MMXIi8 o, Format F, dag outs, dag ins, string asm, +class MMXIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, PS, Requires<[HasMMX]>; -class MMXID o, Format F, dag outs, dag ins, string asm, +class MMXID o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, XD, Requires<[HasMMX]>; -class MMXIS o, Format F, dag outs, dag ins, string asm, +class MMXIS o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, XS, Requires<[HasMMX]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7f87bdd1a73..785e5ec10bb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4099,7 +4099,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; - case X86::KSET0B: + case X86::KSET0B: case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); case X86::KSET1B: case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index bcc34ea36ae..bce49d44056 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -188,7 +188,7 @@ def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void, - [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; @@ -1982,7 +1982,7 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in { let Predicates = [HasLZCNT] in { def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E), - (X86cmp GR16:$src, (i16 0))), + (X86cmp GR16:$src, (i16 0))), (LZCNT16rr GR16:$src)>; def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E), (X86cmp GR32:$src, (i32 0))), @@ -2001,22 +2001,22 @@ let Predicates = [HasLZCNT] in { (LZCNT64rr GR64:$src)>; def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E), - (X86cmp (loadi16 addr:$src), (i16 0))), + (X86cmp (loadi16 addr:$src), (i16 0))), (LZCNT16rm addr:$src)>; def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E), - (X86cmp (loadi32 addr:$src), (i32 0))), + (X86cmp (loadi32 addr:$src), (i32 0))), (LZCNT32rm addr:$src)>; def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E), - (X86cmp (loadi64 addr:$src), (i64 0))), + (X86cmp (loadi64 addr:$src), (i64 0))), (LZCNT64rm addr:$src)>; def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E), - (X86cmp (loadi16 addr:$src), (i16 0))), + (X86cmp (loadi16 addr:$src), (i16 0))), (LZCNT16rm addr:$src)>; def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E), - (X86cmp (loadi32 addr:$src), (i32 0))), + (X86cmp (loadi32 addr:$src), (i32 0))), (LZCNT32rm addr:$src)>; def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E), - (X86cmp (loadi64 addr:$src), (i64 0))), + (X86cmp (loadi64 addr:$src), (i64 0))), (LZCNT64rm addr:$src)>; } @@ -2117,22 +2117,22 @@ let Predicates = [HasBMI] in { (TZCNT64rr GR64:$src)>; def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E), - (X86cmp (loadi16 addr:$src), (i16 0))), + (X86cmp (loadi16 addr:$src), (i16 0))), (TZCNT16rm addr:$src)>; def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E), - (X86cmp (loadi32 addr:$src), (i32 0))), + (X86cmp (loadi32 addr:$src), (i32 0))), (TZCNT32rm addr:$src)>; def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E), - (X86cmp (loadi64 addr:$src), (i64 0))), + (X86cmp (loadi64 addr:$src), (i64 0))), (TZCNT64rm addr:$src)>; def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E), - (X86cmp (loadi16 addr:$src), (i16 0))), + (X86cmp (loadi16 addr:$src), (i16 0))), (TZCNT16rm addr:$src)>; def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E), - (X86cmp (loadi32 addr:$src), (i32 0))), + (X86cmp (loadi32 addr:$src), (i32 0))), (TZCNT32rm addr:$src)>; def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E), - (X86cmp (loadi64 addr:$src), (i64 0))), + (X86cmp (loadi64 addr:$src), (i64 0))), (TZCNT64rm addr:$src)>; } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index f222be306eb..5b36427acb1 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -171,7 +171,7 @@ multiclass SS3I_binop_rm_int_mm opc, string OpcodeStr, multiclass ssse3_palign_mm { def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>, Sched<[WriteShuffle]>; def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), @@ -220,7 +220,7 @@ def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", // Data Transfer Instructions def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, + [(set VR64:$dst, (x86mmx (scalar_to_vector GR32:$src)))], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; let canFoldAsLoad = 1 in @@ -254,7 +254,7 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), let SchedRW = [WriteMove] in { def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), - "movd\t{$src, $dst|$dst, $src}", + "movd\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>; let hasSideEffects = 0 in @@ -486,19 +486,19 @@ defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d, MMX_INTALU_ITINS>; // -- Unpack Instructions -defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", +defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", int_x86_mmx_punpckhbw, MMX_UNPCK_H_ITINS>; -defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", +defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", int_x86_mmx_punpckhwd, MMX_UNPCK_H_ITINS>; -defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", +defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", int_x86_mmx_punpckhdq, MMX_UNPCK_H_ITINS>; -defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", +defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", int_x86_mmx_punpcklbw, MMX_UNPCK_L_ITINS>; -defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", +defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", int_x86_mmx_punpcklwd, MMX_UNPCK_L_ITINS>; defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq", @@ -566,7 +566,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, IIC_MMX_PEXTR>, Sched<[WriteShuffle]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, - (outs VR64:$dst), + (outs VR64:$dst), (ins VR64:$src1, GR32orGR64:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0bf765b53dd..18ba3b45197 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3099,7 +3099,7 @@ let isCodeGenOnly = 1 in { // previously we generated: // addss %xmm0, %xmm1 // movss %xmm1, %xmm0 -// +// // we now generate: // addss %xmm1, %xmm0 @@ -3321,7 +3321,7 @@ let Predicates = [HasAVX] in { // previously we generated: // addps %xmm0, %xmm1 // movss %xmm1, %xmm0 -// +// // we now generate: // addss %xmm1, %xmm0 @@ -3329,13 +3329,13 @@ let Predicates = [UseSSE1] in { def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))), (ADDSSrr_Int v4f32:$dst, v4f32:$src)>; - def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))), (SUBSSrr_Int v4f32:$dst, v4f32:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))), (MULSSrr_Int v4f32:$dst, v4f32:$src)>; - def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))), (DIVSSrr_Int v4f32:$dst, v4f32:$src)>; } @@ -3364,13 +3364,13 @@ let Predicates = [UseSSE41] in { def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (ADDSSrr_Int v4f32:$dst, v4f32:$src)>; - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), + def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (SUBSSrr_Int v4f32:$dst, v4f32:$src)>; def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (MULSSrr_Int v4f32:$dst, v4f32:$src)>; - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), + def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (DIVSSrr_Int v4f32:$dst, v4f32:$src)>; @@ -3435,13 +3435,13 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (VADDSSrr_Int v4f32:$dst, v4f32:$src)>; - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), + def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>; def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (VMULSSrr_Int v4f32:$dst, v4f32:$src)>; - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), + def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>; @@ -3980,8 +3980,8 @@ def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), let SchedRW = [WriteNop] in { // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), - "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, +def PAUSE : I<0x90, RawFrm, (outs), (ins), + "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, OBXS, Requires<[HasSSE2]>; } @@ -6428,21 +6428,21 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))), (VPMOVSXDQYrm addr:$src)>; - def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 + def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VPMOVSXBDYrm addr:$src)>; - def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64 + def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), (VPMOVSXBDYrm addr:$src)>; - def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64 + def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), (VPMOVSXWQYrm addr:$src)>; - def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64 + def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), (VPMOVSXWQYrm addr:$src)>; - def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32 + def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), (VPMOVSXBQYrm addr:$src)>; } diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index ec833d756ff..3f1646907b6 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -289,11 +289,11 @@ def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), "sar{w}\t{%cl, $dst|$dst, cl}", [(store (sra (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>, OpSize16; -def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), +def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t{%cl, $dst|$dst, cl}", [(store (sra (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>, OpSize32; -def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), +def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t{%cl, $dst|$dst, cl}", [(store (sra (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; @@ -347,7 +347,7 @@ def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), let Uses = [CL] in def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>; - + def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t$dst", [], IIC_SR>, OpSize16; def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), @@ -381,7 +381,7 @@ def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), let Uses = [CL] in def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>; - + def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t$dst", [], IIC_SR>, OpSize16; def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), @@ -397,7 +397,7 @@ def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), let Uses = [CL] in def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32; - + def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "rcr{q}\t$dst", [], IIC_SR>; def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), @@ -493,7 +493,7 @@ def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "rol{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize32; -def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), +def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))], @@ -600,7 +600,7 @@ def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "ror{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize32; -def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), +def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))], @@ -635,11 +635,11 @@ def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), "ror{w}\t{%cl, $dst|$dst, cl}", [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>, OpSize16; -def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), +def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t{%cl, $dst|$dst, cl}", [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>, OpSize32; -def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), +def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t{%cl, $dst|$dst, cl}", [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; @@ -688,19 +688,19 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { -def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), +def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))], IIC_SHD16_REG_CL>, TB, OpSize16; -def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), +def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))], IIC_SHD16_REG_CL>, TB, OpSize16; -def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), +def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))], @@ -710,58 +710,58 @@ def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))], IIC_SHD32_REG_CL>, TB, OpSize32; -def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), +def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))], - IIC_SHD64_REG_CL>, + IIC_SHD64_REG_CL>, TB; -def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), +def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))], - IIC_SHD64_REG_CL>, + IIC_SHD64_REG_CL>, TB; } let isCommutable = 1 in { // These instructions commute to each other. def SHLD16rri8 : Ii8<0xA4, MRMDestReg, - (outs GR16:$dst), + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize16; def SHRD16rri8 : Ii8<0xAC, MRMDestReg, - (outs GR16:$dst), + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize16; def SHLD32rri8 : Ii8<0xA4, MRMDestReg, - (outs GR32:$dst), + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB, OpSize32; def SHRD32rri8 : Ii8<0xAC, MRMDestReg, - (outs GR32:$dst), + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB, OpSize32; def SHLD64rri8 : RIi8<0xA4, MRMDestReg, - (outs GR64:$dst), + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; def SHRD64rri8 : RIi8<0xAC, MRMDestReg, - (outs GR64:$dst), + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, @@ -789,7 +789,7 @@ def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), addr:$dst)], IIC_SHD32_MEM_CL>, TB, OpSize32; - + def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), @@ -807,7 +807,7 @@ def SHLD16mri8 : Ii8<0xA4, MRMDestMem, (i8 imm:$src3)), addr:$dst)], IIC_SHD16_MEM_IM>, TB, OpSize16; -def SHRD16mri8 : Ii8<0xAC, MRMDestMem, +def SHRD16mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, @@ -822,7 +822,7 @@ def SHLD32mri8 : Ii8<0xA4, MRMDestMem, (i8 imm:$src3)), addr:$dst)], IIC_SHD32_MEM_IM>, TB, OpSize32; -def SHRD32mri8 : Ii8<0xAC, MRMDestMem, +def SHRD32mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, @@ -837,7 +837,7 @@ def SHLD64mri8 : RIi8<0xA4, MRMDestMem, (i8 imm:$src3)), addr:$dst)], IIC_SHD64_MEM_IM>, TB; -def SHRD64mri8 : RIi8<0xAC, MRMDestMem, +def SHRD64mri8 : RIi8<0xAC, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 8cabdd0424d..848eeeb07c8 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -207,7 +207,7 @@ def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; -def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), +def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize16; def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), @@ -215,14 +215,14 @@ def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), OpSize16; // i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. -def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), +def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize32; def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB, OpSize32; // i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. -def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), +def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB; def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB; @@ -240,7 +240,7 @@ def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize32; def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB; + "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB; def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB; @@ -260,7 +260,7 @@ def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", [], IIC_LTR>, TB; def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", [], IIC_LTR>, TB; - + def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", [], IIC_PUSH_SR>, OpSize16, Requires<[Not64BitMode]>; @@ -347,31 +347,31 @@ def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16; def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32; - + def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32; def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), "lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; - + def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16; def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32; - + def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32; def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), "lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; - + def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32; - + def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), "lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; @@ -408,7 +408,7 @@ def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins), "sldt{w}\t$dst", [], IIC_SLDT>, TB; def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), "sldt{l}\t$dst", [], IIC_SLDT>, OpSize32, TB; - + // LLDT is not interpreted specially in 64-bit mode because there is no sign // extension. def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), @@ -444,12 +444,12 @@ let Defs = [RAX, RDX], Uses = [ECX] in def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>, TB; -def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), +def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), "smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB; -def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), +def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), "smsw{l}\t$dst", [], IIC_SMSW>, OpSize32, TB; // no m form encodable; use SMSW16m -def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), +def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), "smsw{q}\t$dst", [], IIC_SMSW>, TB; // For memory operands, there is only a 16-bit form diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index e2c31b27949..d4b1b5e0db3 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -20,7 +20,7 @@ enum IntrinsicType { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, - CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, + CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM }; @@ -51,7 +51,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0), X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0), X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0), - + X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0), X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0), X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0), @@ -60,7 +60,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0), X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0), X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0), - + X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH, X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm), X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH, @@ -69,7 +69,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm), X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH, X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm), - + X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0), @@ -78,7 +78,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0), - + X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm), X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, @@ -87,7 +87,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm), X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm), - + X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0), @@ -97,7 +97,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0), X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0), X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0), - + X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0), X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0), X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0), diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 74b60bbc5bf..a8330f4dd75 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -390,9 +390,8 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, Inst.addOperand(Seg); } -static unsigned getRetOpcode(const X86Subtarget &Subtarget) -{ - return Subtarget.is64Bit() ? X86::RETQ : X86::RETL; +static unsigned getRetOpcode(const X86Subtarget &Subtarget) { + return Subtarget.is64Bit() ? X86::RETQ : X86::RETL; } void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { @@ -856,7 +855,7 @@ static void LowerSTATEPOINT(MCStreamer &OS, StackMaps &SM, // Record our statepoint node in the same section used by STACKMAP // and PATCHPOINT - SM.recordStatepoint(MI); + SM.recordStatepoint(MI); } @@ -1085,7 +1084,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case TargetOpcode::STATEPOINT: return LowerSTATEPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), TM, getSubtargetInfo(), MCInstLowering); - + case TargetOpcode::STACKMAP: return LowerSTACKMAP(*MI); diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp index 9518395916d..ac2cdc8c656 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.cpp +++ b/lib/Target/X86/X86MachineFunctionInfo.cpp @@ -20,7 +20,7 @@ void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) { const X86RegisterInfo *RegInfo = static_cast( MF->getSubtarget().getRegisterInfo()); unsigned SlotSize = RegInfo->getSlotSize(); - for (const MCPhysReg *CSR = + for (const MCPhysReg *CSR = RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF); unsigned Reg = *CSR; ++CSR) diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 2bd37eb16e0..42f449063f7 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -31,12 +31,12 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// contains stack pointer re-alignment code which requires FP. bool ForceFramePointer; - /// RestoreBasePointerOffset - Non-zero if the function has base pointer - /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a - /// displacement from the frame pointer to a slot where the base pointer - /// is stashed. + /// RestoreBasePointerOffset - Non-zero if the function has base pointer + /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a + /// displacement from the frame pointer to a slot where the base pointer + /// is stashed. signed char RestoreBasePointerOffset; - + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the /// stack frame in bytes. unsigned CalleeSavedFrameSize; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index fda82cd689b..5b0207d1b31 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -161,7 +161,7 @@ protected: /// True if unaligned 32-byte memory accesses are slow. bool IsUAMem32Slow; - + /// HasVectorUAMem - True if SIMD operations can have unaligned memory /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; @@ -208,7 +208,7 @@ protected: /// For this to be profitable, the cost of FDIV must be /// substantially higher than normal FP ops like FADD and FMUL. bool UseReciprocalEst; - + /// Processor has AVX-512 PreFetch Instructions bool HasPFI; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 2b70fd0ecf8..9f9fb350bdc 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -352,7 +352,7 @@ unsigned X86TTI::getArithmeticInstrCost( { ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized. { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized. - { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized. + { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized. { ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized. { ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized. @@ -525,7 +525,7 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or }; - + if (ST->hasSSSE3()) { int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx != -1) @@ -538,7 +538,7 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd - + // This is expanded into a long sequence of four extract + four insert. {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw. @@ -546,7 +546,7 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48} }; - // Fall-back (SSE3 and SSE2). + // Fall-back (SSE3 and SSE2). int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx != -1) return LT.first * SSEAltShuffleTbl[Idx].Cost; @@ -930,17 +930,17 @@ unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, bool IsPairwise) const { - + std::pair LT = TLI->getTypeLegalizationCost(ValTy); - + MVT MTy = LT.second; - + int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - - // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput - // and make it as the cost. - + + // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput + // and make it as the cost. + static const CostTblEntry SSE42CostTblPairWise[] = { { ISD::FADD, MVT::v2f64, 2 }, { ISD::FADD, MVT::v4f32, 4 }, @@ -948,7 +948,7 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". { ISD::ADD, MVT::v8i16, 5 }, }; - + static const CostTblEntry AVX1CostTblPairWise[] = { { ISD::FADD, MVT::v4f32, 4 }, { ISD::FADD, MVT::v4f64, 5 }, @@ -967,7 +967,7 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3". { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3". }; - + static const CostTblEntry AVX1CostTblNoPairWise[] = { { ISD::FADD, MVT::v4f32, 3 }, { ISD::FADD, MVT::v4f64, 3 }, @@ -978,14 +978,14 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, { ISD::ADD, MVT::v8i16, 4 }, { ISD::ADD, MVT::v8i32, 5 }, }; - + if (IsPairwise) { if (ST->hasAVX()) { int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy); if (Idx != -1) return LT.first * AVX1CostTblPairWise[Idx].Cost; } - + if (ST->hasSSE42()) { int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy); if (Idx != -1) @@ -997,7 +997,7 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, if (Idx != -1) return LT.first * AVX1CostTblNoPairWise[Idx].Cost; } - + if (ST->hasSSE42()) { int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy); if (Idx != -1)