X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=inline;f=lib%2FTarget%2FX86%2FX86ISelDAGToDAG.cpp;h=df8de82c14b99511d1d72cb3faf0d922a9f9d17d;hb=36fdcfb93ada212cea2b363629dc63e558707ca1;hp=5a19f8ab981e59aaf96221a48f854fe2703808e8;hpb=6765834754cbb3cb0f15b4b15e98c5e73fa50066;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 5a19f8ab981..df8de82c14b 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,33 +12,33 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include using namespace llvm; +#define DEBUG_TYPE "x86-isel" + STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// @@ -67,22 +67,24 @@ namespace { const Constant *CP; const BlockAddress *BlockAddr; const char *ES; + MCSymbol *MCSym; int JT; unsigned Align; // CP alignment. unsigned char SymbolFlags; // X86II::MO_* X86ISelAddressMode() - : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), - Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), - SymbolFlags(X86II::MO_NO_FLAG) { - } + : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), + Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), + MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {} bool hasSymbolicDisplacement() const { - return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; + return GV != nullptr || CP != nullptr || ES != nullptr || + MCSym != nullptr || JT != -1 || BlockAddr != nullptr; } bool hasBaseOrIndexReg() const { - return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; + return BaseType == FrameIndexBase || + IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; } /// isRIPRelative - Return true if this addressing mode is already RIP @@ -104,14 +106,14 @@ namespace { void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; - if (Base_Reg.getNode() != 0) + if (Base_Reg.getNode()) Base_Reg.getNode()->dump(); else dbgs() << "nul"; dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; - if (IndexReg.getNode() != 0) + if (IndexReg.getNode()) IndexReg.getNode()->dump(); else dbgs() << "nul"; @@ -132,6 +134,11 @@ namespace { dbgs() << ES; else dbgs() << "nul"; + dbgs() << " MCSym "; + if (MCSym) + dbgs() << MCSym; + else + dbgs() << "nul"; dbgs() << " JT" << JT << " Align" << Align << '\n'; } #endif @@ -143,11 +150,7 @@ namespace { /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. /// - class X86DAGToDAGISel : public SelectionDAGISel { - /// X86Lowering - This object fully describes how to lower LLVM code to an - /// X86-specific SelectionDAG. - const X86TargetLowering &X86Lowering; - + class X86DAGToDAGISel final : public SelectionDAGISel { /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; @@ -158,20 +161,24 @@ namespace { public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), - X86Lowering(*tm.getTargetLowering()), - Subtarget(&tm.getSubtarget()), - OptForSize(false) {} + : SelectionDAGISel(tm, OptLevel), OptForSize(false) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "X86 DAG->DAG Instruction Selection"; } - virtual void EmitFunctionEntryCode(); + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &MF.getSubtarget(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + + void EmitFunctionEntryCode() override; - virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; + bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; - virtual void PreprocessISelDAG(); + void PreprocessISelDAG() override; inline bool immSext8(SDNode *N) const { return isInt<8>(cast(N)->getSExtValue()); @@ -188,10 +195,9 @@ namespace { #include "X86GenDAGISel.inc" private: - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; SDNode *SelectGather(SDNode *N, unsigned Opc); - SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); - SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); + SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); @@ -203,9 +209,16 @@ namespace { bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + bool SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); + bool SelectMOV64Imm32(SDValue N, SDValue &Imm); bool SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); + bool SelectLEA64_32Addr(SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -222,24 +235,27 @@ namespace { /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps); - - void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); - - inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, - SDValue &Scale, SDValue &Index, - SDValue &Disp, SDValue &Segment) { - Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) : - AM.Base_Reg; - Scale = getI8Imm(AM.Scale); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + unsigned ConstraintID, + std::vector &OutOps) override; + + void EmitSpecialCodeForMain(); + + inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment) { + Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) + ? CurDAG->getTargetFrameIndex( + AM.Base_FrameIndex, + TLI->getPointerTy(CurDAG->getDataLayout())) + : AM.Base_Reg; + Scale = getI8Imm(AM.Scale, DL); Index = AM.IndexReg; // These are 32-bit even in 64-bit mode since RIP relative offset // is 32-bit. if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(), + Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), MVT::i32, AM.Disp, AM.SymbolFlags); else if (AM.CP) @@ -248,6 +264,10 @@ namespace { else if (AM.ES) { assert(!AM.Disp && "Non-zero displacement is ignored with ES."); Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); + } else if (AM.MCSym) { + assert(!AM.Disp && "Non-zero displacement is ignored with MCSym."); + assert(AM.SymbolFlags == 0 && "oo"); + Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); } else if (AM.JT != -1) { assert(!AM.Disp && "Non-zero displacement is ignored with JT."); Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); @@ -255,7 +275,7 @@ namespace { Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, AM.SymbolFlags); else - Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); + Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32); if (AM.Segment.getNode()) Segment = AM.Segment; @@ -263,16 +283,92 @@ namespace { Segment = CurDAG->getRegister(0, MVT::i32); } + // Utility function to determine whether we should avoid selecting + // immediate forms of instructions for better code size or not. + // At a high level, we'd like to avoid such instructions when + // we have similar constants used within the same basic block + // that can be kept in a register. + // + bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { + uint32_t UseCount = 0; + + // Do not want to hoist if we're not optimizing for size. + // TODO: We'd like to remove this restriction. + // See the comment in X86InstrInfo.td for more info. + if (!OptForSize) + return false; + + // Walk all the users of the immediate. + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) { + + SDNode *User = *UI; + + // This user is already selected. Count it as a legitimate use and + // move on. + if (User->isMachineOpcode()) { + UseCount++; + continue; + } + + // We want to count stores of immediates as real uses. + if (User->getOpcode() == ISD::STORE && + User->getOperand(1).getNode() == N) { + UseCount++; + continue; + } + + // We don't currently match users that have > 2 operands (except + // for stores, which are handled above) + // Those instruction won't match in ISEL, for now, and would + // be counted incorrectly. + // This may change in the future as we add additional instruction + // types. + if (User->getNumOperands() != 2) + continue; + + // Immediates that are used for offsets as part of stack + // manipulation should be left alone. These are typically + // used to indicate SP offsets for argument passing and + // will get pulled into stores/pushes (implicitly). + if (User->getOpcode() == X86ISD::ADD || + User->getOpcode() == ISD::ADD || + User->getOpcode() == X86ISD::SUB || + User->getOpcode() == ISD::SUB) { + + // Find the other operand of the add/sub. + SDValue OtherOp = User->getOperand(0); + if (OtherOp.getNode() == N) + OtherOp = User->getOperand(1); + + // Don't count if the other operand is SP. + RegisterSDNode *RegNode; + if (OtherOp->getOpcode() == ISD::CopyFromReg && + (RegNode = dyn_cast_or_null( + OtherOp->getOperand(1).getNode()))) + if ((RegNode->getReg() == X86::ESP) || + (RegNode->getReg() == X86::RSP)) + continue; + } + + // ... otherwise, count this and move on. + UseCount++; + } + + // If we have more than 1 use, then recommend for hoisting. + return (UseCount > 1); + } + /// getI8Imm - Return a target constant with the specified value, of type /// i8. - inline SDValue getI8Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i8); + inline SDValue getI8Imm(unsigned Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm, DL, MVT::i8); } /// getI32Imm - Return a target constant with the specified value, of type /// i32. - inline SDValue getI32Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); + inline SDValue getI32Imm(unsigned Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -283,14 +379,21 @@ namespace { /// getTargetMachine - Return a reference to the TargetMachine, casted /// to the target-specific type. - const X86TargetMachine &getTargetMachine() { + const X86TargetMachine &getTargetMachine() const { return static_cast(TM); } /// getInstrInfo - Return a reference to the TargetInstrInfo, casted /// to the target-specific type. - const X86InstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); + const X86InstrInfo *getInstrInfo() const { + return Subtarget->getInstrInfo(); + } + + /// \brief Address-mode matching performs shift-of-and to and-of-shift + /// reassociation in order to expose more scaled addressing + /// opportunities. + bool ComplexPatternFuncMutatesDAG() const override { + return true; } }; } @@ -346,7 +449,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { // addl %gs:0, %eax // if the block also has an access to a second TLS address this will save // a load. - // FIXME: This is probably also true for non TLS addresses. + // FIXME: This is probably also true for non-TLS addresses. if (Op1.getOpcode() == X86ISD::Wrapper) { SDValue Val = Op1.getOperand(0); if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) @@ -376,23 +479,19 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, else Ops.push_back(Chain.getOperand(i)); SDValue NewChain = - CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), - MVT::Other, &Ops[0], Ops.size()); + CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); Ops.clear(); Ops.push_back(NewChain); } - for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) - Ops.push_back(OrigChain.getOperand(i)); - CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); + Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); + CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); - unsigned NumOps = Call.getNode()->getNumOperands(); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); - for (unsigned i = 1, e = NumOps; i != e; ++i) - Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); + Ops.append(Call->op_begin() + 1, Call->op_end()); + CurDAG->UpdateNodeOperands(Call.getNode(), Ops); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -423,6 +522,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (!Chain.getNumOperands()) return false; + // Since we are not checking for AA here, conservatively abort if the chain + // writes to memory. It's not safe to move the callee (a load) across a store. + if (isa(Chain.getNode()) && + cast(Chain.getNode())->writeMem()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && @@ -434,17 +538,18 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + OptForSize = MF->getFunction()->optForSize(); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - (N->getOpcode() == X86ISD::CALL || + // Only does this when target favors doesn't favor register indirect + // call. + ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || (N->getOpcode() == X86ISD::TC_RETURN && - // Only does this if load can be foled into TC_RETURN. + // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just @@ -487,8 +592,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - EVT SrcVT = N->getOperand(0).getValueType(); - EVT DstVT = N->getValueType(0); + MVT SrcVT = N->getOperand(0).getSimpleValueType(); + MVT DstVT = N->getSimpleValueType(0); // If any of the sources are vectors, no fp stack involved. if (SrcVT.isVector() || DstVT.isVector()) @@ -496,8 +601,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // If the source and destination are SSE registers, then this is a legal // conversion that should not be lowered. - bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); - bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); + const X86TargetLowering *X86Lowering = + static_cast(TLI); + bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) continue; @@ -513,14 +620,14 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. - EVT MemVT; + MVT MemVT; if (N->getOpcode() == ISD::FP_ROUND) MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else MemVT = SrcIsSSE ? SrcVT : DstVT; SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, @@ -529,7 +636,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, MachinePointerInfo(), - MemVT, false, false, 0); + MemVT, false, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -548,14 +655,19 @@ void X86DAGToDAGISel::PreprocessISelDAG() { /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in /// the main function. -void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, - MachineFrameInfo *MFI) { - const TargetInstrInfo *TII = TM.getInstrInfo(); +void X86DAGToDAGISel::EmitSpecialCodeForMain() { if (Subtarget->isTargetCygMing()) { - unsigned CallOp = - Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; - BuildMI(BB, DebugLoc(), - TII->get(CallOp)).addExternalSymbol("__main"); + TargetLowering::ArgListTy Args; + auto &DL = CurDAG->getDataLayout(); + + TargetLowering::CallLoweringInfo CLI(*CurDAG); + CLI.setChain(CurDAG->getRoot()) + .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), + CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), + std::move(Args), 0); + const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); + std::pair Result = TLI.LowerCallTo(CLI); + CurDAG->setRoot(Result.second); } } @@ -563,7 +675,7 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() { // If this is main, emit special code for main. if (const Function *Fn = MF->getFunction()) if (Fn->hasExternalLinkage() && Fn->getName() == "main") - EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); + EmitSpecialCodeForMain(); } static bool isDispSafeForFrameIndex(int64_t Val) { @@ -578,6 +690,9 @@ static bool isDispSafeForFrameIndex(int64_t Val) { bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM) { + // Cannot combine ExternalSymbol displacements with integer offsets. + if (Offset != 0 && (AM.ES || AM.MCSym)) + return true; int64_t Val = AM.Disp + Offset; CodeModel::Model M = TM.getCodeModel(); if (Subtarget->is64Bit()) { @@ -605,7 +720,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ // gs:0 (or fs:0 on X86-64) contains its own address. // For more information see http://people.redhat.com/drepper/tls.pdf if (ConstantSDNode *C = dyn_cast(Address)) - if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && + if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && Subtarget->isTargetLinux()) switch (N->getPointerInfo().getAddrSpace()) { case 256: @@ -662,6 +777,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (ExternalSymbolSDNode *S = dyn_cast(N0)) { AM.ES = S->getSymbol(); AM.SymbolFlags = S->getTargetFlags(); + } else if (auto *S = dyn_cast(N0)) { + AM.MCSym = S->getMCSymbol(); } else if (JumpTableSDNode *J = dyn_cast(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); @@ -700,6 +817,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (ExternalSymbolSDNode *S = dyn_cast(N0)) { AM.ES = S->getSymbol(); AM.SymbolFlags = S->getTargetFlags(); + } else if (auto *S = dyn_cast(N0)) { + AM.MCSym = S->getMCSymbol(); } else if (JumpTableSDNode *J = dyn_cast(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); @@ -726,7 +845,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { // a smaller encoding and avoids a scaled-index. if (AM.Scale == 2 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0) { + AM.Base_Reg.getNode() == nullptr) { AM.Base_Reg = AM.IndexReg; AM.Scale = 1; } @@ -738,8 +857,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { Subtarget->is64Bit() && AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && - AM.IndexReg.getNode() == 0 && + AM.Base_Reg.getNode() == nullptr && + AM.IndexReg.getNode() == nullptr && AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); @@ -760,9 +879,10 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { } } -// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This -// allows us to convert the shift and and into an h-register extract and -// a scaled index. Returns false if the simplification is performed. +// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if +// safe. This allows us to convert the shift and and into an h-register +// extract and a scaled index. Returns false if the simplification is +// performed. static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, @@ -777,13 +897,13 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, Mask != (0xffu << ScaleLog)) return true; - EVT VT = N.getValueType(); - DebugLoc DL = N.getDebugLoc(); - SDValue Eight = DAG.getConstant(8, MVT::i8); - SDValue NewMask = DAG.getConstant(0xff, VT); + MVT VT = N.getSimpleValueType(); + SDLoc DL(N); + SDValue Eight = DAG.getConstant(8, DL, MVT::i8); + SDValue NewMask = DAG.getConstant(0xff, DL, VT); SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); - SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8); + SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); // Insert the new nodes into the topological ordering. We must do this in @@ -825,9 +945,9 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) return true; - EVT VT = N.getValueType(); - DebugLoc DL = N.getDebugLoc(); - SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); + MVT VT = N.getSimpleValueType(); + SDLoc DL(N); + SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); @@ -882,8 +1002,8 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, return true; unsigned ShiftAmt = Shift.getConstantOperandVal(1); - unsigned MaskLZ = CountLeadingZeros_64(Mask); - unsigned MaskTZ = CountTrailingZeros_64(Mask); + unsigned MaskLZ = countLeadingZeros(Mask); + unsigned MaskTZ = countTrailingZeros(Mask); // The amount of shift we're trying to fit into the addressing mode is taken // from the trailing zeros of the mask. @@ -894,11 +1014,11 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; // We also need to ensure that mask is a continuous run of bits. - if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; + if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; // Scale the leading zero count down based on the actual size of the value. // Also scale it down based on the size of the shift. - MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt; + MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; // The final check is to ensure that any masked out high bits of X are // already known to be zero. Otherwise, the mask has a semantic impact @@ -908,34 +1028,34 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // replace them with zero extensions cheaply if necessary. bool ReplacingAnyExtend = false; if (X.getOpcode() == ISD::ANY_EXTEND) { - unsigned ExtendBits = - X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits(); + unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - + X.getOperand(0).getSimpleValueType().getSizeInBits(); // Assume that we'll replace the any-extend with a zero-extend, and // narrow the search to the extended value. X = X.getOperand(0); MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; ReplacingAnyExtend = true; } - APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(), - MaskLZ); + APInt MaskedHighBits = + APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(X, KnownZero, KnownOne); + DAG.computeKnownBits(X, KnownZero, KnownOne); if (MaskedHighBits != KnownZero) return true; // We've identified a pattern that can be transformed into a single shift // and an addressing mode. Make it so. - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); if (ReplacingAnyExtend) { assert(X.getValueType() != VT); // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. - SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X); + SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); InsertDAGNode(DAG, N, NewX); X = NewX; } - DebugLoc DL = N.getDebugLoc(); - SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); + SDLoc DL(N); + SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); - SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8); + SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); // Insert the new nodes into the topological ordering. We must do this in @@ -956,7 +1076,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { - DebugLoc dl = N.getDebugLoc(); + SDLoc dl(N); DEBUG({ dbgs() << "MatchAddress: "; AM.dump(); @@ -972,7 +1092,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // FIXME: JumpTable and ExternalSymbol address currently don't like // displacements. It isn't very important, but this should be fixed for // consistency. - if (!AM.ES && AM.JT != -1) return true; + if (!(AM.ES || AM.MCSym) && AM.JT != -1) + return true; if (ConstantSDNode *Cst = dyn_cast(N)) if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) @@ -982,6 +1103,15 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, switch (N.getOpcode()) { default: break; + case ISD::LOCAL_RECOVER: { + if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) + if (const auto *ESNode = dyn_cast(N.getOperand(0))) { + // Use the symbol and don't prefix it. + AM.MCSym = ESNode->getMCSymbol(); + return false; + } + break; + } case ISD::Constant: { uint64_t Val = cast(N)->getSExtValue(); if (!FoldOffsetIntoAddress(Val, AM)) @@ -1002,7 +1132,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::FrameIndex: if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && + AM.Base_Reg.getNode() == nullptr && (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; AM.Base_FrameIndex = cast(N)->getIndex(); @@ -1011,7 +1141,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::SHL: - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; if (ConstantSDNode @@ -1040,12 +1170,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal; return false; } - break; } + break; case ISD::SRL: { // Scale must not be used already. - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; SDValue And = N.getOperand(0); if (And.getOpcode() != ISD::AND) break; @@ -1053,7 +1183,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // We only handle up to 64-bit values here as those are what matter for // addressing mode optimizations. - if (X.getValueSizeInBits() > 64) break; + if (X.getSimpleValueType().getSizeInBits() > 64) break; // The mask used for the transform is expected to be post-shift, but we // found the shift first so just apply the shift to the mask before passing @@ -1079,8 +1209,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case X86ISD::MUL_IMM: // X*[3,5,9] -> X+X*[2,4,8] if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && - AM.IndexReg.getNode() == 0) { + AM.Base_Reg.getNode() == nullptr && + AM.IndexReg.getNode() == nullptr) { if (ConstantSDNode *CN = dyn_cast(N.getNode()->getOperand(1))) if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || @@ -1167,7 +1297,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } // Ok, the transformation is legal and appears profitable. Go for it. - SDValue Zero = CurDAG->getConstant(0, N.getValueType()); + SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType()); SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); AM.IndexReg = Neg; AM.Scale = 1; @@ -1230,7 +1360,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // with a constant to enable use of the scaled offset field. // Scale must not be used already. - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; SDValue Shift = N.getOperand(0); if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; @@ -1238,7 +1368,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // We only handle up to 64-bit values here as those are what matter for // addressing mode optimizations. - if (X.getValueSizeInBits() > 64) break; + if (X.getSimpleValueType().getSizeInBits() > 64) break; if (!isa(N.getOperand(1))) break; @@ -1269,7 +1399,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { // If so, check to see if the scale index register is set. - if (AM.IndexReg.getNode() == 0) { + if (!AM.IndexReg.getNode()) { AM.IndexReg = N; AM.Scale = 1; return false; @@ -1285,6 +1415,42 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { return false; } +bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + + MaskedGatherScatterSDNode *Mgs = dyn_cast(Parent); + if (!Mgs) + return false; + X86ISelAddressMode AM; + unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace(); + // AddrSpace 256 -> GS, 257 -> FS. + if (AddrSpace == 256) + AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); + if (AddrSpace == 257) + AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); + + SDLoc DL(N); + Base = Mgs->getBasePtr(); + Index = Mgs->getIndex(); + unsigned ScalarSize = Mgs->getValue().getValueType().getScalarSizeInBits(); + Scale = getI8Imm(ScalarSize/8, DL); + + // If Base is 0, the whole address is in index and the Scale is 1 + if (isa(Base)) { + assert(dyn_cast(Base)->isNullValue() && + "Unexpected base in gather/scatter"); + Scale = getI8Imm(1, DL); + Base = CurDAG->getRegister(0, MVT::i32); + } + if (AM.Segment.getNode()) + Segment = AM.Segment; + else + Segment = CurDAG->getRegister(0, MVT::i32); + Disp = CurDAG->getTargetConstant(0, DL, MVT::i32); + return true; +} + /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. @@ -1302,7 +1468,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, // that are not a MemSDNode, and thus don't have proper addrspace info. Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores - Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + Parent->getOpcode() != X86ISD::TLSCALL && // Fixme + Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp + Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = cast(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS. @@ -1315,7 +1483,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (MatchAddress(N, AM)) return false; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { if (!AM.Base_Reg.getNode()) AM.Base_Reg = CurDAG->getRegister(0, VT); @@ -1324,7 +1492,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (!AM.IndexReg.getNode()) AM.IndexReg = CurDAG->getRegister(0, VT); - getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment); return true; } @@ -1374,6 +1542,73 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, } +bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { + if (const ConstantSDNode *CN = dyn_cast(N)) { + uint64_t ImmVal = CN->getZExtValue(); + if ((uint32_t)ImmVal != (uint64_t)ImmVal) + return false; + + Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64); + return true; + } + + // In static codegen with small code model, we can get the address of a label + // into a register with 'movl'. TableGen has already made sure we're looking + // at a label of some kind. + assert(N->getOpcode() == X86ISD::Wrapper && + "Unexpected node type for MOV32ri64"); + N = N.getOperand(0); + + if (N->getOpcode() != ISD::TargetConstantPool && + N->getOpcode() != ISD::TargetJumpTable && + N->getOpcode() != ISD::TargetGlobalAddress && + N->getOpcode() != ISD::TargetExternalSymbol && + N->getOpcode() != ISD::MCSymbol && + N->getOpcode() != ISD::TargetBlockAddress) + return false; + + Imm = N; + return TM.getCodeModel() == CodeModel::Small; +} + +bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment)) + return false; + + SDLoc DL(N); + RegisterSDNode *RN = dyn_cast(Base); + if (RN && RN->getReg() == 0) + Base = CurDAG->getRegister(0, MVT::i64); + else if (Base.getValueType() == MVT::i32 && !dyn_cast(Base)) { + // Base could already be %rip, particularly in the x32 ABI. + Base = SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(0, DL, MVT::i64), + Base, + CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)), + 0); + } + + RN = dyn_cast(Index); + if (RN && RN->getReg() == 0) + Index = CurDAG->getRegister(0, MVT::i64); + else { + assert(Index.getValueType() == MVT::i32 && + "Expect to be extending 32-bit registers for use in LEA"); + Index = SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, + CurDAG->getTargetConstant(0, DL, MVT::i64), + Index, + CurDAG->getTargetConstant(X86::sub_32bit, DL, + MVT::i32)), + 0); + } + + return true; +} + /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, @@ -1392,7 +1627,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, assert (T == AM.Segment); AM.Segment = Copy; - EVT VT = N.getValueType(); + MVT VT = N.getSimpleValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) if (AM.Base_Reg.getNode()) @@ -1433,7 +1668,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, if (Complexity <= 2) return false; - getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment); return true; } @@ -1457,7 +1692,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, AM.IndexReg = CurDAG->getRegister(0, MVT::i64); } - getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment); return true; } @@ -1481,26 +1716,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); -} - -SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { - SDValue Chain = Node->getOperand(0); - SDValue In1 = Node->getOperand(1); - SDValue In2L = Node->getOperand(2); - SDValue In2H = Node->getOperand(3); - - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return NULL; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); - cast(ResNode)->setMemRefs(MemOp, MemOp + 1); - return ResNode; + auto &DL = MF->getDataLayout(); + return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); } /// Atomic opcode table @@ -1632,18 +1849,25 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { // + empty, the operand is not needed any more with the new op selected. // + non-empty, otherwise. static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, - DebugLoc dl, - enum AtomicOpc &Op, EVT NVT, - SDValue Val) { + SDLoc dl, + enum AtomicOpc &Op, MVT NVT, + SDValue Val, + const X86Subtarget *Subtarget) { if (ConstantSDNode *CN = dyn_cast(Val)) { int64_t CNVal = CN->getSExtValue(); // Quit if not 32-bit imm. if ((int32_t)CNVal != CNVal) return Val; + // Quit if INT32_MIN: it would be negated as it is negative and overflow, + // producing an immediate that does not fit in the 32 bits available for + // an immediate operand to sub. However, it still fits in 32 bits for the + // add (since it is not negated) so we can return target-constant. + if (CNVal == INT32_MIN) + return CurDAG->getTargetConstant(CNVal, dl, NVT); // For atomic-load-add, we could do some optimizations. if (Op == ADD) { // Translate to INC/DEC if ADD by 1 or -1. - if ((CNVal == 1) || (CNVal == -1)) { + if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) { Op = (CNVal == 1) ? INC : DEC; // No more constant operand after being translated into INC/DEC. return SDValue(); @@ -1654,7 +1878,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, CNVal = -CNVal; } } - return CurDAG->getTargetConstant(CNVal, NVT); + return CurDAG->getTargetConstant(CNVal, dl, NVT); } // If the value operand is single-used, try to optimize it. @@ -1680,11 +1904,11 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, return Val; } -SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { +SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { if (Node->hasAnyUseOfValue(0)) - return 0; + return nullptr; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" @@ -1692,15 +1916,15 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { SDValue Chain = Node->getOperand(0); SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; + SDValue Base, Scale, Index, Disp, Segment; + if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment)) + return nullptr; // Which index into the table. enum AtomicOpc Op; switch (Node->getOpcode()) { default: - return 0; + return nullptr; case ISD::ATOMIC_LOAD_OR: Op = OR; break; @@ -1714,14 +1938,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { Op = ADD; break; } - - Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); + + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget); bool isUnOp = !Val.getNode(); bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { - default: return 0; + switch (NVT.SimpleTy) { + default: return nullptr; case MVT::i8: if (isCN) Opc = AtomicOpcTbl[Op][ConstantI8]; @@ -1747,35 +1971,42 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { Opc = AtomicOpcTbl[Op][I32]; break; case MVT::i64: - Opc = AtomicOpcTbl[Op][I64]; if (isCN) { if (immSext8(Val.getNode())) Opc = AtomicOpcTbl[Op][SextConstantI64]; else if (i64immSExt32(Val.getNode())) Opc = AtomicOpcTbl[Op][ConstantI64]; - } + else + llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith"); + } else + Opc = AtomicOpcTbl[Op][I64]; break; } assert(Opc != 0 && "Invalid arith lock transform!"); + // Building the new node. SDValue Ret; - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); if (isUnOp) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, - array_lengthof(Ops)), 0); + SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); } else { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, - array_lengthof(Ops)), 0); + SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); } + + // Copying the MachineMemOperand. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(Node)->getMemOperand(); cast(Ret)->setMemRefs(MemOp, MemOp + 1); + + // We need to have two outputs as that is what the original instruction had. + // So we add a dummy, undefined output. This is safe as we checked first + // that no-one uses our output anyway. + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, NVT), 0); SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + return CurDAG->getMergeValues(RetVals, dl).getNode(); } /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has @@ -1805,8 +2036,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: - case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: - case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: + case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1: + case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1: case X86::CMOVA16rr: case X86::CMOVA16rm: case X86::CMOVA32rr: case X86::CMOVA32rm: case X86::CMOVA64rr: case X86::CMOVA64rm: @@ -1917,8 +2148,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, if (ChainCheck) // Make a new TokenFactor with all the other input chains except // for the load. - InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(), - MVT::Other, &ChainOps[0], ChainOps.size()); + InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), + MVT::Other, ChainOps); } if (!ChainCheck) return false; @@ -1955,18 +2186,19 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue VMask = Node->getOperand(5); ConstantSDNode *Scale = dyn_cast(Node->getOperand(6)); if (!Scale) - return 0; + return nullptr; SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), MVT::Other); + SDLoc DL(Node); + // Memory Operands: Base, Scale, Index, Disp, Segment - SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32); SDValue Segment = CurDAG->getRegister(0, MVT::i32); - const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, + const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx, Disp, Segment, VMask, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - VTs, Ops, array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops); // Node has 2 outputs: VDst and MVT::Other. // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. // We replace VDst of Node with VDst of ResNode, and Other of Node with Other @@ -1977,20 +2209,42 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { } SDNode *X86DAGToDAGISel::Select(SDNode *Node) { - EVT NVT = Node->getValueType(0); + MVT NVT = Node->getSimpleValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); - return NULL; // Already selected. + Node->setNodeId(-1); + return nullptr; // Already selected. } switch (Opcode) { default: break; + case ISD::BRIND: { + if (Subtarget->isTargetNaCl()) + // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We + // leave the instruction alone. + break; + if (Subtarget->isTarget64BitILP32()) { + // Converts a 32-bit register to a 64-bit, zero-extended version of + // it. This is needed because x86-64 can do many things, but jmp %r32 + // ain't one of them. + const SDValue &Target = Node->getOperand(1); + assert(Target.getSimpleValueType() == llvm::MVT::i32); + SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64)); + SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other, + Node->getOperand(0), ZextTarget); + ReplaceUses(SDValue(Node, 0), Brind); + SelectCode(ZextTarget.getNode()); + SelectCode(Brind.getNode()); + return nullptr; + } + break; + } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { @@ -2011,6 +2265,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::x86_avx2_gather_d_d_256: case Intrinsic::x86_avx2_gather_q_d: case Intrinsic::x86_avx2_gather_q_d_256: { + if (!Subtarget->hasAVX2()) + break; unsigned Opc; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); @@ -2034,7 +2290,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *RetVal = SelectGather(Node, Opc); if (RetVal) // We already called ReplaceUses inside SelectGather. - return NULL; + return nullptr; break; } } @@ -2043,37 +2299,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); - - case X86ISD::ATOMOR64_DAG: - case X86ISD::ATOMXOR64_DAG: - case X86ISD::ATOMADD64_DAG: - case X86ISD::ATOMSUB64_DAG: - case X86ISD::ATOMNAND64_DAG: - case X86ISD::ATOMAND64_DAG: - case X86ISD::ATOMMAX64_DAG: - case X86ISD::ATOMMIN64_DAG: - case X86ISD::ATOMUMAX64_DAG: - case X86ISD::ATOMUMIN64_DAG: - case X86ISD::ATOMSWAP64_DAG: { - unsigned Opc; - switch (Opcode) { - default: llvm_unreachable("Impossible opcode"); - case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; - case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; - case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; - case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; - case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; - case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; - case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; - case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; - case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; - case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; - case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; - } - SDNode *RetVal = SelectAtomic64(Node, Opc); - if (RetVal) - return RetVal; - break; + case X86ISD::SHRUNKBLEND: { + // SHRUNKBLEND selects like a regular VSELECT. + SDValue VSelect = CurDAG->getNode( + ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2)); + ReplaceUses(SDValue(Node, 0), VSelect); + SelectCode(VSelect.getNode()); + // We already called ReplaceUses. + return nullptr; } case ISD::ATOMIC_LOAD_XOR: @@ -2114,8 +2348,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) break; - unsigned ShlOp, Op; - EVT CstVT = NVT; + unsigned ShlOp, AddOp, Op; + MVT CstVT = NVT; // Check the minimum bitwidth for the new constant. // TODO: AND32ri is the same as AND64ri32 with zext imm. @@ -2130,11 +2364,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (NVT == CstVT) break; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i32: assert(CstVT == MVT::i8); ShlOp = X86::SHL32ri; + AddOp = X86::ADD32rr; switch (Opcode) { default: llvm_unreachable("Impossible opcode"); @@ -2146,6 +2381,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i64: assert(CstVT == MVT::i8 || CstVT == MVT::i32); ShlOp = X86::SHL64ri; + AddOp = X86::ADD64rr; switch (Opcode) { default: llvm_unreachable("Impossible opcode"); @@ -2157,17 +2393,39 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Emit the smaller op and the shift. - SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); + SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT); SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); + if (ShlVal == 1) + return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0), + SDValue(New, 0)); return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), - getI8Imm(ShlVal)); + getI8Imm(ShlVal, dl)); + } + case X86ISD::UMUL8: + case X86ISD::SMUL8: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r); + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL, + N0, SDValue()).getValue(1); + + SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32); + SDValue Ops[] = {N1, InFlag}; + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); + return nullptr; } + case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); unsigned LoReg; - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; @@ -2180,12 +2438,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); - return NULL; + return nullptr; } case ISD::SMUL_LOHI: @@ -2196,7 +2454,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { bool isSigned = Opcode == ISD::SMUL_LOHI; bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; @@ -2206,7 +2464,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; @@ -2261,16 +2519,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag }; if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); ResHi = SDValue(CNode, 0); ResLo = SDValue(CNode, 1); Chain = SDValue(CNode, 2); InFlag = SDValue(CNode, 3); } else { SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); Chain = SDValue(CNode, 0); InFlag = SDValue(CNode, 1); } @@ -2281,15 +2537,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { N1, InFlag }; if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ResHi = SDValue(CNode, 0); ResLo = SDValue(CNode, 1); InFlag = SDValue(CNode, 2); } else { SDVTList VTs = CurDAG->getVTList(MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); InFlag = SDValue(CNode, 0); } } @@ -2309,14 +2563,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Shift AX down 8 bits. Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); + CurDAG->getTargetConstant(8, dl, MVT::i8)), + 0); // Then truncate it down to i8. ReplaceUses(SDValue(Node, 1), CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - if (ResLo.getNode() == 0) { + if (!ResLo.getNode()) { assert(LoReg && "Register for low half is not defined!"); ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); @@ -2327,7 +2582,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - if (ResHi.getNode() == 0) { + if (!ResHi.getNode()) { assert(HiReg && "Register for high half is not defined!"); ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, InFlag); @@ -2337,17 +2592,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } - return NULL; + return nullptr; } case ISD::SDIVREM: - case ISD::UDIVREM: { + case ISD::UDIVREM: + case X86ISD::SDIVREM8_SEXT_HREG: + case X86ISD::UDIVREM8_ZEXT_HREG: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - bool isSigned = Opcode == ISD::SDIVREM; + bool isSigned = (Opcode == ISD::SDIVREM || + Opcode == X86ISD::SDIVREM8_SEXT_HREG); if (!isSigned) { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; @@ -2355,7 +2613,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; } } else { - switch (NVT.getSimpleVT().SimpleTy) { + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; @@ -2365,27 +2623,24 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } unsigned LoReg, HiReg, ClrReg; - unsigned ClrOpcode, SExtOpcode; - switch (NVT.getSimpleVT().SimpleTy) { + unsigned SExtOpcode; + switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; ClrReg = HiReg = X86::AH; - ClrOpcode = 0; SExtOpcode = X86::CBW; break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; + ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; - ClrOpcode = X86::MOV32r0; SExtOpcode = X86::CDQ; break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } @@ -2403,8 +2658,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops, - array_lengthof(Ops)), 0); + MVT::Other, Ops), 0); Chain = Move.getValue(1); ReplaceUses(N0.getValue(1), Chain); } else { @@ -2424,8 +2678,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode = - SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); + SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0); + switch (NVT.SimpleTy) { + case MVT::i16: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, + CurDAG->getTargetConstant(X86::sub_16bit, dl, + MVT::i32)), + 0); + break; + case MVT::i32: + break; + case MVT::i64: + ClrNode = + SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode, + CurDAG->getTargetConstant(X86::sub_32bit, dl, + MVT::i32)), + 0); + break; + default: + llvm_unreachable("Unexpected division source"); + } + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } @@ -2435,8 +2712,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); @@ -2445,28 +2721,44 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); } - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - if (HiReg == X86::AH && Subtarget->is64Bit() && - !SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - - // If we also need AL (the quotient), get it by extracting a subreg from - // Result. The fast register allocator does not like multiple CopyFromReg - // nodes using aliasing registers. - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), - CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); - - // Shift AX right by 8 bits instead of using AH. - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), - 0); - ReplaceUses(SDValue(Node, 1), - CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + // Prevent use of AH in a REX instruction by explicitly copying it to + // an ABCD_L register. + // + // The current assumption of the register allocator is that isel + // won't generate explicit references to the GR8_ABCD_H registers. If + // the allocator and/or the backend get enhanced to be more robust in + // that regard, this can be, and should be, removed. + if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { + SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8); + unsigned AHExtOpcode = + isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8; + + SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32, + MVT::Glue, AHCopy, InFlag); + SDValue Result(RNode, 0); + InFlag = SDValue(RNode, 1); + + if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG || + Opcode == X86ISD::SDIVREM8_SEXT_HREG) { + if (Node->getValueType(1) == MVT::i64) { + // It's not possible to directly movsx AH to a 64bit register, because + // the latter needs the REX prefix, but the former can't have it. + assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG && + "Unexpected i64 sext of h-register"); + Result = + SDValue(CurDAG->getMachineNode( + TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, + CurDAG->getTargetConstant(0, dl, MVT::i64), Result, + CurDAG->getTargetConstant(X86::sub_32bit, dl, + MVT::i32)), + 0); + } + } else { + Result = + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); + } + ReplaceUses(SDValue(Node, 1), Result); + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the division (low) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { @@ -2484,7 +2776,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - return NULL; + return nullptr; } case X86ISD::CMP: @@ -2496,12 +2788,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to - // use a smaller encoding. if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && HasNoSignedComparisonUses(Node)) - // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); + + // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to + // use a smaller encoding. + // Look past the truncate if CMP is the only use of it. if ((N0.getNode()->getOpcode() == ISD::AND || (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && N0.getNode()->hasOneUse() && @@ -2514,18 +2807,18 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && (!(C->getZExtValue() & 0x80) || HasNoSignedComparisonUses(Node))) { - SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Reg = N0.getNode()->getOperand(0); // On x86-32, only the ABCD registers have 8-bit subregisters. if (!Subtarget->is64Bit()) { const TargetRegisterClass *TRC; - switch (N0.getValueType().getSimpleVT().SimpleTy) { + switch (N0.getSimpleValueType().SimpleTy) { case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; default: llvm_unreachable("Unsupported TEST operand type!"); } - SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, Reg.getValueType(), Reg, RC), 0); } @@ -2541,7 +2834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2550,18 +2843,18 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { HasNoSignedComparisonUses(Node))) { // Shift the immediate right by 8 bits. SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, - MVT::i8); + dl, MVT::i8); SDValue Reg = N0.getNode()->getOperand(0); // Put the value in an ABCD register. const TargetRegisterClass *TRC; - switch (N0.getValueType().getSimpleVT().SimpleTy) { + switch (N0.getSimpleValueType().SimpleTy) { case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; default: llvm_unreachable("Unsupported TEST operand type!"); } - SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, Reg.getValueType(), Reg, RC), 0); @@ -2578,7 +2871,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2586,7 +2879,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { N0.getValueType() != MVT::i16 && (!(C->getZExtValue() & 0x8000) || HasNoSignedComparisonUses(Node))) { - SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, + MVT::i16); SDValue Reg = N0.getNode()->getOperand(0); // Extract the 16-bit subregister. @@ -2600,7 +2894,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2608,7 +2902,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { N0.getValueType() == MVT::i64 && (!(C->getZExtValue() & 0x80000000) || HasNoSignedComparisonUses(Node))) { - SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, + MVT::i32); SDValue Reg = N0.getNode()->getOperand(0); // Extract the 32-bit subregister. @@ -2622,7 +2917,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } } break; @@ -2649,7 +2944,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue StoredVal = StoreNode->getOperand(1); unsigned Opc = StoredVal->getOpcode(); - LoadSDNode *LoadNode = 0; + LoadSDNode *LoadNode = nullptr; SDValue InputChain; if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, LoadNode, InputChain)) @@ -2667,9 +2962,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT LdVT = LoadNode->getMemoryVT(); unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, - Node->getDebugLoc(), - MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + SDLoc(Node), + MVT::i32, MVT::Other, Ops); Result->setMemRefs(MemOp, MemOp + 2); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); @@ -2677,91 +2971,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node); DEBUG(dbgs() << "=> "; - if (ResNode == NULL || ResNode == Node) + if (ResNode == nullptr || ResNode == Node) Node->dump(CurDAG); else ResNode->dump(CurDAG); @@ -2771,15 +2986,21 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } bool X86DAGToDAGISel:: -SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { SDValue Op0, Op1, Op2, Op3, Op4; - switch (ConstraintCode) { - case 'o': // offsetable ?? - case 'v': // not offsetable ?? - default: return true; - case 'm': // memory - if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4)) + switch (ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + // FIXME: It seems strange that 'i' is needed here since it's supposed to + // be an immediate and not a memory constraint. + // Fallthrough. + case InlineAsm::Constraint_o: // offsetable ?? + case InlineAsm::Constraint_v: // not offsetable ?? + case InlineAsm::Constraint_m: // memory + case InlineAsm::Constraint_X: + if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; }