X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGBuilder.cpp;h=6e9125d0c7cc524ab678e5c339dd6a1b84f46f69;hb=7bcdaa697859e291893c518cda65b947a43a7205;hp=75ae45ca873c4c6090f16ebf04a2dbe11619606b;hpb=716c5d8a308a2257298f1f227edf7f7ae102cf4f;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 75ae45ca873..6e9125d0c7c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -79,12 +78,16 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +static cl::opt +EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden, + cl::desc("Enable fast-math-flags for DAG nodes")); + // Limit the width of DAG chains. This is important in general to prevent -// prevent DAG-based analysis from blowing up. For example, alias analysis and +// DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to // recognize and avoid this situation within each individual analysis, and // future analyses are likely to have the same behavior. Limiting DAG width is -// the safe approach, and will be especially important with global DAGs. +// the safe approach and will be especially important with global DAGs. // // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st @@ -143,7 +146,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); @@ -157,7 +160,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, // Combine the round and odd parts. Lo = Val; - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); @@ -174,7 +177,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.hasBigEndianPartOrdering(ValueVT)) + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { @@ -258,8 +261,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - assert(RegisterVT == Parts[0].getSimpleValueType() && - "Part type doesn't match part!"); + assert(RegisterVT.getSizeInBits() == + Parts[0].getSimpleValueType().getSizeInBits() && + "Part type sizes don't match!"); // Assemble the parts into intermediate operands. SmallVector Ops(NumIntermediates); @@ -358,10 +362,10 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; - assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && + "Copying to an illegal type!"); if (NumParts == 0) return; @@ -429,7 +433,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); @@ -464,7 +468,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, } } - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::reverse(Parts, Parts + OrigNumParts); } @@ -578,93 +582,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } } -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector ValueVTs; +RegsForValue::RegsForValue() {} - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector Regs; - - RegsForValue() {} - - RegsForValue(const SmallVector ®s, - MVT regvt, EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, Type *Ty) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - MVT RegisterVT = tli.getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } +RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, + EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V = nullptr) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void - getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, - SDValue *Flag, const Value *V, - ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, SDLoc dl, - SelectionDAG &DAG, - std::vector &Ops) const; - }; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + MVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from @@ -860,7 +796,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { // If we clobbered the stack pointer, MFI should know about it. assert(DAG.getMachineFunction().getFrameInfo()-> - hasInlineAsmWithSPAdjust()); + hasOpaqueSPAdjustment()); } } } @@ -871,7 +807,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getTarget().getDataLayout(); + DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -999,8 +935,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - MDLocalVariable *Variable = DI->getVariable(); - MDExpression *Expr = DI->getExpression(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); @@ -1024,18 +960,18 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); - SDValue res; + SDValue Result; if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, Ty); SDValue Chain = DAG.getEntryNode(); - res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); - resolveDanglingDebugInfo(V, res); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, Result); } - return res; + return Result; } /// getValue - Return an SDValue for the given Value. @@ -1071,7 +1007,16 @@ bool SelectionDAGBuilder::findValue(const Value *V) const { SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; - if (N.getNode()) return N; + if (N.getNode()) { + if (isa(N) || isa(N)) { + // Remove the debug location from the node as the node is about to be used + // in a location which may differ from the original debug location. This + // is relevant to Constant and ConstantFP nodes because they can appear + // as constant expressions inside PHI nodes. + N->setDebugLoc(DebugLoc()); + } + return N; + } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); @@ -1501,8 +1446,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) - // = TrueProb for orignal BB. - // Assuming the orignal weights are A and B, one choice is to set BB1's + // = TrueProb for original BB. + // Assuming the original weights are A and B, one choice is to set BB1's // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice // assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. @@ -1537,8 +1482,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) - // = FalseProb for orignal BB. - // Assuming the orignal weights are A and B, one choice is to set BB1's + // = FalseProb for original BB. + // Assuming the original weights are A and B, one choice is to set BB1's // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice // assumes that // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. @@ -1826,8 +1771,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue GuardPtr = getValue(IRGuard); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - unsigned Align = - TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); SDValue Guard; SDLoc dl = getCurSDLoc(); @@ -2208,6 +2152,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { bool nuw = false; bool nsw = false; bool exact = false; + FastMathFlags FMF; + if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); @@ -2216,9 +2162,22 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); - + if (const FPMathOperator *FPOp = dyn_cast(&I)) + FMF = FPOp->getFastMathFlags(); + + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); + if (EnableFMFInDAG) { + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + } SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, nuw, nsw, exact); + Op1, Op2, &Flags); setValue(&I, BinNodeValue); } @@ -2266,9 +2225,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { dyn_cast(&I)) exact = ExactOp->isExact(); } - + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - nuw, nsw, exact); + &Flags); setValue(&I, Res); } @@ -2276,17 +2238,11 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - // Turn exact SDivs into multiplications. - // FIXME: This should be in DAGCombiner, but it doesn't have access to the - // exact bit. - if (isa(&I) && cast(&I)->isExact() && - !isa(Op1) && - isa(Op2) && !cast(Op2)->isNullValue()) - setValue(&I, DAG.getTargetLoweringInfo() - .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); - else - setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), - Op1, Op2)); + SDNodeFlags Flags; + Flags.setExact(isa(&I) && + cast(&I)->isExact()); + setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, + Op2, &Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { @@ -2326,19 +2282,51 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); - SDValue TrueVal = getValue(I.getOperand(1)); - SDValue FalseVal = getValue(I.getOperand(2)); + SDValue LHSVal = getValue(I.getOperand(1)); + SDValue RHSVal = getValue(I.getOperand(2)); + auto BaseOps = {Cond}; ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; - for (unsigned i = 0; i != NumValues; ++i) + // Min/max matching is only viable if all output VTs are the same. + if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { + Value *LHS, *RHS; + SelectPatternFlavor SPF = matchSelectPattern(const_cast(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPF) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + default: break; + } + + EVT VT = ValueVTs[0]; + LLVMContext &Ctx = *DAG.getContext(); + auto &TLI = DAG.getTargetLoweringInfo(); + while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + VT = TLI.getTypeToTransformTo(Ctx, VT); + + if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && + // If the underlying comparison instruction is used by any other instruction, + // the consumed instructions won't be destroyed, so it is not profitable + // to convert to a min/max. + cast(&I)->getCondition()->hasOneUse()) { + OpCode = Opc; + LHSVal = getValue(LHS); + RHSVal = getValue(RHS); + BaseOps = {}; + } + } + + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), - Cond, - SDValue(TrueVal.getNode(), - TrueVal.getResNo() + i), - SDValue(FalseVal.getNode(), - FalseVal.getResNo() + i)); + LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), + Ops); + } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); @@ -2834,10 +2822,10 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), - I.getAlignment()); + std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); @@ -2889,7 +2877,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + + // The IR notion of invariant_load only guarantees that all *non-faulting* + // invariant loads result in the same value. The MI notion of invariant load + // guarantees that the load can be legally moved to any location within its + // containing function. The MI notion of invariant_load is stronger than the + // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load + // with a guarantee that the location being loaded from is dereferenceable + // throughout the function's lifetime. + + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && + isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -2910,7 +2908,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( - AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { + MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -2925,8 +2923,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector Values(NumValues); - SmallVector Chains(std::min(unsigned(MaxParallelChains), - NumValues)); + SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -2990,8 +2987,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); - SmallVector Chains(std::min(unsigned(MaxParallelChains), - NumValues)); + SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; @@ -3123,7 +3119,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); - Value *MemOpBasePtr = UniformBase ? BasePtr : NULL; + Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), MachineMemOperand::MOStore, VT.getStoreSize(), @@ -3159,10 +3155,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory( - AliasAnalysis::Location(PtrOperand, - AA->getTypeStoreSize(I.getType()), - AAInfo))) { + if (AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. InChain = DAG.getEntryNode(); } @@ -3204,10 +3198,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; - if (UniformBase && AA->pointsToConstantMemory( - AliasAnalysis::Location(BasePtr, - AA->getTypeStoreSize(I.getType()), - AAInfo))) { + if (UniformBase && + AA->pointsToConstantMemory( + MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3215,15 +3208,14 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : NULL), - MachineMemOperand::MOLoad, VT.getStoreSize(), - Alignment, AAInfo, Ranges); + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); if (!UniformBase) { Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); Index = getValue(Ptr); } - SDValue Ops[] = { Root, Src0, Mask, Base, Index }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO); @@ -3981,8 +3973,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( - const Value *V, MDLocalVariable *Variable, MDExpression *Expr, - MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; @@ -4087,16 +4079,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); EVT VT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + Res = DAG.getNode(ISD::READ_REGISTER, sdl, + DAG.getVTList(VT, MVT::Other), Chain, RegName); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); - SDValue Chain = getValue(RegValue).getOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, @@ -4176,8 +4172,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); if (!Address) { @@ -4258,8 +4254,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgValueInst &DI = cast(I); assert(DI.getVariable() && "Missing variable"); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4386,11 +4382,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::masked_gather: visitMaskedGather(I); + return nullptr; case Intrinsic::masked_load: visitMaskedLoad(I); return nullptr; case Intrinsic::masked_scatter: visitMaskedScatter(I); + return nullptr; case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; @@ -4782,7 +4780,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::debugtrap: case Intrinsic::trap: { - StringRef TrapFuncName = TM.Options.getTrapFunctionName(); + StringRef TrapFuncName = + I.getAttributes() + .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") + .getValueAsString(); if (TrapFuncName.empty()) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; @@ -4933,11 +4934,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - case Intrinsic::frameescape: { + case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); @@ -4951,7 +4952,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, - TII->get(TargetOpcode::FRAME_ALLOC)) + TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) .addFrameIndex(FI); } @@ -4959,8 +4960,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::framerecover: { - // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) + case Intrinsic::localrecover: { + // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = TLI.getPointerTy(0); @@ -4972,13 +4973,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); - // Create a TargetExternalSymbol for the label to avoid any target lowering + // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. - StringRef Name = FrameAllocSym->getName(); - assert(Name.data()[Name.size()] == '\0' && "not null terminated"); - SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); + SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); SDValue OffsetVal = - DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); + DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. Value *FP = I.getArgOperand(1); @@ -4996,6 +4995,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(Reg && "cannot get exception code on this platform"); MVT PtrVT = TLI.getPointerTy(); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad"); unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); @@ -5015,7 +5015,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().CreateTempSymbol(); + BeginLabel = MMI.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. @@ -5058,7 +5058,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. @@ -5486,7 +5486,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - if (unsigned IID = F->getIntrinsicID()) { + if (Intrinsic::ID IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; @@ -5669,9 +5669,8 @@ public: /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. - EVT getCallOperandValEVT(LLVMContext &Context, - const TargetLowering &TLI, - const DataLayout *DL) const { + EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL) const { if (!CallOperandVal) return MVT::Other; if (isa(CallOperandVal)) @@ -5697,7 +5696,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = DL->getTypeSizeInBits(OpTy); + unsigned BitSize = DL.getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -5837,8 +5836,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( + DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; @@ -5887,8 +5886,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = - OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, + DAG.getDataLayout()).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -5982,8 +5981,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); @@ -6379,7 +6379,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = *TLI.getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), @@ -6414,7 +6414,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { std::pair SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool UseVoidTy, + Type *ReturnTy, MachineBasicBlock *LandingPad, bool IsPatchPoint) { TargetLowering::ArgListTy Args; @@ -6435,10 +6435,9 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); return lowerInvokable(CLI, LandingPad); @@ -6580,8 +6579,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + Type *ReturnTy = + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); std::pair Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, + lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, LandingPad, true); SDNode *CallEnd = Result.second.getNode(); @@ -6716,6 +6717,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Type *OrigRetTy = CLI.RetTy; SmallVector RetTys; SmallVector Offsets; + auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); SmallVector Outs; @@ -6731,8 +6733,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // FIXME: equivalent assert? // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); - uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); - unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); + uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); + unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); @@ -6795,7 +6797,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -6819,7 +6821,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -7028,7 +7030,7 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); - const DataLayout *DL = TLI->getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); SmallVector Ins; if (!FuncInfo->CanLowerReturn) { @@ -7064,7 +7066,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7088,7 +7090,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -7472,7 +7474,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, JumpTableHeader JTH(Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), SI->getCondition(), nullptr, false); - JTCases.push_back(JumpTableBlock(JTH, JT)); + JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, JTCases.size() - 1, Weight); @@ -7498,6 +7500,31 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, const int64_t N = Clusters.size(); const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + // TotalCases[i]: Total nbr of cases in Clusters[0..i]. + SmallVector TotalCases(N); + + for (unsigned i = 0; i < N; ++i) { + APInt Hi = Clusters[i].High->getValue(); + APInt Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { + // Cheap case: the whole range might be suitable for jump table. + CaseCluster JTCluster; + if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { + Clusters[0] = JTCluster; + Clusters.resize(1); + return; + } + } + + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + // Split Clusters into minimum number of dense partitions. The algorithm uses // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code // for the Case Statement'" (1994), but builds the MinPartitions array in @@ -7511,16 +7538,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, SmallVector LastElement(N); // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. SmallVector NumTables(N); - // TotalCases[i]: Total nbr of cases in Clusters[0..i]. - SmallVector TotalCases(N); - - for (unsigned i = 0; i < N; ++i) { - APInt Hi = Clusters[i].High->getValue(); - APInt Lo = Clusters[i].Low->getValue(); - TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; - if (i != 0) - TotalCases[i] += TotalCases[i - 1]; - } // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; @@ -7578,7 +7595,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; return Range <= BW; } @@ -7635,7 +7652,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, const int BitWidth = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); - assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!"); + assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); if (Low.isNonNegative() && High.slt(BitWidth)) { // Optimize the case where all the case values fit in a @@ -7663,10 +7680,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, // Update Mask, Bits and ExtraWeight. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); - for (uint64_t j = Lo; j <= Hi; ++j) { - CB->Mask |= 1ULL << j; - CB->Bits++; - } + assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); + CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; + CB->Bits += Hi - Lo + 1; CB->ExtraWeight += Clusters[i].Weight; TotalWeight += Clusters[i].Weight; assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); @@ -7685,9 +7701,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); } - BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(), - -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI))); + BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), + SI->getCondition(), -1U, MVT::Other, false, nullptr, + nullptr, std::move(BTI)); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); @@ -7709,6 +7725,10 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); #endif + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PTy = TLI.getPointerTy(); @@ -7781,8 +7801,10 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { Clusters[DstIndex++] = BitTestCluster; } else { - for (unsigned I = First; I <= Last; ++I) - std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + size_t NumClusters = Last - First + 1; + std::memmove(&Clusters[DstIndex], &Clusters[First], + sizeof(Clusters[0]) * NumClusters); + DstIndex += NumClusters; } } Clusters.resize(DstIndex); @@ -7818,22 +7840,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, const APInt &BigValue = Big.Low->getValue(); // Check that there is only one bit different. - if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && - (SmallValue | BigValue) == BigValue) { - // Isolate the common bit. - APInt CommonBit = BigValue & ~SmallValue; - assert((SmallValue | CommonBit) == BigValue && - CommonBit.countPopulation() == 1 && "Not a common bit?"); - + APInt CommonBit = BigValue ^ SmallValue; + if (CommonBit.isPowerOf2()) { SDValue CondLHS = getValue(Cond); EVT VT = CondLHS.getValueType(); SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, DL, VT)); - SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or, - DAG.getConstant(BigValue, DL, VT), - ISD::SETEQ); + SDValue Cond = DAG.getSetCC( + DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), + ISD::SETEQ); // Update successor info. // Both Small and Big will jump to Small.BB, so we sum up the weights. @@ -7975,6 +7992,18 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } +unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, + CaseClusterIt First, + CaseClusterIt Last) { + return std::count_if(First, Last + 1, [&](const CaseCluster &X) { + if (X.Weight != CC.Weight) + return X.Weight > CC.Weight; + + // Ties are broken by comparing the case value. + return X.Low->getValue().slt(CC.Low->getValue()); + }); +} + void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, @@ -7982,17 +8011,83 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); - unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; - assert(NumClusters >= 2 && "Too small to split!"); + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + // Balance the tree based on branch weights to create a near-optimal (in terms + // of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + uint32_t LeftWeight = LastLeft->Weight; + uint32_t RightWeight = FirstRight->Weight; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the weight on both + // sides. If LeftWeight and RightWeight are equal, alternate which side is + // taken to ensure 0-weight nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) + LeftWeight += (++LastLeft)->Weight; + else + RightWeight += (--FirstRight)->Weight; + I++; + } + + for (;;) { + // Our binary search tree differs from a typical BST in that ours can have up + // to three values in each leaf. The pivot selection above doesn't take that + // into account, which means the tree might require more nodes and be less + // efficient. We compensate for this here. + + unsigned NumLeft = LastLeft - W.FirstCluster + 1; + unsigned NumRight = W.LastCluster - FirstRight + 1; + + if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { + // If one side has less than 3 clusters, and the other has more than 3, + // consider taking a cluster from the other side. + + if (NumLeft < NumRight) { + // Consider moving the first cluster on the right to the left side. + CaseCluster &CC = *FirstRight; + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + if (LeftSideRank <= RightSideRank) { + // Moving the cluster to the left does not demote it. + ++LastLeft; + ++FirstRight; + continue; + } + } else { + assert(NumRight < NumLeft); + // Consider moving the last element on the left to the right side. + CaseCluster &CC = *LastLeft; + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + if (RightSideRank <= LeftSideRank) { + // Moving the cluster to the right does not demot it. + --LastLeft; + --FirstRight; + continue; + } + } + } + break; + } + + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); - // FIXME: When we have profile info, we might want to balance the tree based - // on weights instead of node count. + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); - CaseClusterIt PivotCluster = W.FirstCluster + NumClusters / 2; CaseClusterIt FirstLeft = W.FirstCluster; - CaseClusterIt LastLeft = PivotCluster - 1; - CaseClusterIt FirstRight = PivotCluster; CaseClusterIt LastRight = W.LastCluster; + const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. @@ -8031,7 +8126,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } // Create the CaseBlock record that will be used to lower the branch. - CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB); + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + LeftWeight, RightWeight); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8047,20 +8143,19 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); - uint32_t Weight = 0; // FIXME: Use 1 instead? - if (BPI) { - Weight = BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()); - assert(Weight <= UINT32_MAX / SI.getNumSuccessors()); - } + uint32_t Weight = + BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; - if (TM.getOptLevel() != CodeGenOpt::None) { - // Cluster adjacent cases with the same destination. - sortAndRangeify(Clusters); + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + if (TM.getOptLevel() != CodeGenOpt::None) { // Replace an unreachable default with the most popular destination. // FIXME: Exploit unreachable default more aggressively. bool UnreachableDefault = @@ -8103,11 +8198,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - if (TM.getOptLevel() != CodeGenOpt::None) { - findJumpTables(Clusters, &SI, DefaultMBB); - findBitTestClusters(Clusters, &SI); - } - + findJumpTables(Clusters, &SI, DefaultMBB); + findBitTestClusters(Clusters, &SI); DEBUG({ dbgs() << "Case clusters: ";