X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGBuilder.cpp;h=4f9656532c275412e0b60cba4005b3b0b14bf147;hp=82f70fe7f07c20af2f2bbdff0e5dfc90531aa96b;hb=1a9ea371223e3811e41b326526e45dbf9f1a7bdc;hpb=29a2d864d43c345e841fcfaa1426a36b3d3e44f6 diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 82f70fe7f07..4f9656532c2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -79,7 +80,7 @@ LimitFPPrecision("limit-float-precision", cl::init(0)); static cl::opt -EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden, +EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); // Limit the width of DAG chains. This is important in general to prevent @@ -319,9 +320,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - bool Smaller = ValueVT.bitsLE(PartEVT); - return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT, Val); + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } @@ -339,11 +338,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, } if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) { - bool Smaller = ValueVT.bitsLE(PartEVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Val); - } + ValueVT.getVectorElementType() != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -520,9 +516,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - bool Smaller = PartEVT.bitsLE(ValueVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else{ // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && @@ -531,9 +525,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - bool Smaller = ValueVT.bitsLE(PartVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } Parts[0] = Val; @@ -595,8 +587,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; + for (EVT ValueVT : ValueVTs) { unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) @@ -1168,6 +1159,95 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { llvm_unreachable("Can't get register for value!"); } +void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { + llvm_unreachable("should never codegen catchpads"); +} + +void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { + // Update machine-CFG edge. + MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; + FuncInfo.MBB->addSuccessor(TargetMBB); + + // Create the terminator node. + SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB)); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCatchEndPad(const CatchEndPadInst &I) { + llvm_unreachable("should never codegen catchendpads"); +} + +void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { + // Don't emit any special code for the cleanuppad instruction. It just marks + // the start of a funclet. + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); +} + +/// When an invoke or a cleanupret unwinds to the next EH pad, there are +/// many places it could ultimately go. In the IR, we have a single unwind +/// destination, but in the machine CFG, we enumerate all the possible blocks. +/// This function skips over imaginary basic blocks that hold catchpad, +/// terminatepad, or catchendpad instructions, and finds all the "real" machine +/// basic block destinations. +static void +findUnwindDestinations(FunctionLoweringInfo &FuncInfo, + const BasicBlock *EHPadBB, + SmallVectorImpl &UnwindDests) { + bool IsMSVCCXX = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()) == + EHPersonality::MSVC_CXX; + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + if (isa(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]); + break; + } else if (isa(Pad) || isa(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]); + UnwindDests.back()->setIsEHFuncletEntry(); + break; + } else if (const auto *CPI = dyn_cast(Pad)) { + // Add the catchpad handler to the possible destinations. + UnwindDests.push_back(FuncInfo.MBBMap[CPI->getNormalDest()]); + // In MSVC C++, catchblocks are funclets and need prologues. + if (IsMSVCCXX) + UnwindDests.back()->setIsEHFuncletEntry(); + EHPadBB = CPI->getUnwindDest(); + } else if (const auto *CEPI = dyn_cast(Pad)) { + EHPadBB = CEPI->getUnwindDest(); + } else if (const auto *CEPI = dyn_cast(Pad)) { + EHPadBB = CEPI->getUnwindDest(); + } + } +} + +void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { + // Update successor info. + // FIXME: The weights for catchpads will be wrong. + SmallVector UnwindDests; + findUnwindDestinations(FuncInfo, I.getUnwindDest(), UnwindDests); + for (MachineBasicBlock *UnwindDest : UnwindDests) { + UnwindDest->setIsEHPad(); + addSuccessorWithWeight(FuncInfo.MBB, UnwindDest); + } + + // Create the terminator node. + SDValue Ret = + DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCleanupEndPad(const CleanupEndPadInst &I) { + report_fatal_error("visitCleanupEndPad not yet implemented!"); +} + +void SelectionDAGBuilder::visitTerminatePad(const TerminatePadInst &TPI) { + report_fatal_error("visitTerminatePad not yet implemented!"); +} + void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); @@ -1385,13 +1465,11 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); - } else if (const FCmpInst *FC = dyn_cast(Cond)) { + } else { + const FCmpInst *FC = cast(Cond); Condition = getFCmpCondCode(FC->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - } else { - (void)Condition; // silence warning. - llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, @@ -1421,7 +1499,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc, uint32_t TWeight, + Instruction::BinaryOps Opc, + uint32_t TWeight, uint32_t FWeight) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast(Cond); @@ -1585,11 +1664,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!DAG.getTargetLoweringInfo().isJumpExpensive() && - BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && + !I.getMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), + Opcode, getEdgeWeight(BrMBB, Succ0MBB), getEdgeWeight(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always @@ -1797,10 +1877,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - StackSlotPtr, - MachinePointerInfo::getFixedStack(FI), - true, false, false, Align); + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, + false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); @@ -1884,8 +1964,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithWeight(SwitchBB, B.Default); - addSuccessorWithWeight(SwitchBB, MBB); + addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight); + addSuccessorWithWeight(SwitchBB, MBB, B.Weight); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, @@ -1958,9 +2038,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; - // Retrieve successors. + // Retrieve successors. Look through artificial IR level blocks like catchpads + // and catchendpads for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; - MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + const BasicBlock *EHPadBB = I.getSuccessor(1); const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast(Callee); @@ -1975,14 +2056,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I, LandingPad); + visitPatchpoint(&I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } } else - LowerCallTo(&I, getValue(Callee), false, LandingPad); + LowerCallTo(&I, getValue(Callee), false, EHPadBB); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -1992,9 +2073,16 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); } - // Update successor info + SmallVector UnwindDests; + findUnwindDestinations(FuncInfo, EHPadBB, UnwindDests); + + // Update successor info. + // FIXME: The weights for catchpads will be wrong. addSuccessorWithWeight(InvokeMBB, Return); - addSuccessorWithWeight(InvokeMBB, LandingPad); + for (MachineBasicBlock *UnwindDest : UnwindDests) { + UnwindDest->setIsEHPad(); + addSuccessorWithWeight(InvokeMBB, UnwindDest); + } // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), @@ -2007,7 +2095,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { - assert(FuncInfo.MBB->isLandingPad() && + assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; @@ -2050,30 +2138,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } -unsigned -SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, - MachineBasicBlock *LPadBB) { - SDValue Chain = getControlRoot(); - SDLoc dl = getCurSDLoc(); - - // Get the typeid that we will dispatch on later. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const TargetRegisterClass *RC = - TLI.getRegClassFor(TLI.getPointerTy(DAG.getDataLayout())); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); - unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); - SDValue Sel = - DAG.getConstant(TypeID, dl, TLI.getPointerTy(DAG.getDataLayout())); - Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel); - - // Branch to the main landing pad block. - MachineBasicBlock *ClauseMBB = FuncInfo.MBB; - ClauseMBB->addSuccessor(LPadBB); - DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain, - DAG.getBasicBlock(LPadBB))); - return VReg; -} - void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { #ifndef NDEBUG for (const CaseCluster &CC : Clusters) @@ -2143,7 +2207,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (DAG.getTarget().Options.TrapUnreachable) - DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2284,6 +2349,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. + // FIXME: We should propagate the fast-math-flags to the DAG node itself for + // further optimization, but currently FMF is only applicable to binary nodes. if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), @@ -2308,23 +2377,45 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // Min/max matching is only viable if all output VTs are the same. if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { - Value *LHS, *RHS; - SelectPatternFlavor SPF = matchSelectPattern(const_cast(&I), LHS, RHS); - ISD::NodeType Opc = ISD::DELETED_NODE; - switch (SPF) { - case SPF_UMAX: Opc = ISD::UMAX; break; - case SPF_UMIN: Opc = ISD::UMIN; break; - case SPF_SMAX: Opc = ISD::SMAX; break; - case SPF_SMIN: Opc = ISD::SMIN; break; - default: break; - } - EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) VT = TLI.getTypeToTransformTo(Ctx, VT); + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPR.Flavor) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; + case SPNB_RETURNS_ANY: + Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM + : ISD::FMINNAN; + break; + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; + case SPNB_RETURNS_ANY: + Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM + : ISD::FMAXNAN; + break; + } + break; + default: break; + } + if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && // If the underlying comparison instruction is used by any other instruction, // the consumed instructions won't be destroyed, so it is not profitable @@ -2787,6 +2878,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector. + unsigned VectorWidth = I.getType()->isVectorTy() ? + cast(I.getType())->getVectorNumElements() : 0; + + if (VectorWidth && !N.getValueType().isVector()) { + MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, N); + N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { const Value *Idx = *OI; @@ -2807,12 +2908,20 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned PtrSize = PtrTy.getSizeInBits(); APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); - // If this is a constant subscript, handle it quickly. - if (const auto *CI = dyn_cast(Idx)) { + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + const auto *CI = dyn_cast(Idx); + if (!CI && isa(Idx) && + cast(Idx)->getSplatValue()) + CI = cast(cast(Idx)->getSplatValue()); + + if (CI) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + SDValue OffsVal = VectorWidth ? + DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, PtrTy); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -2820,6 +2929,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); + if (!IdxN.getValueType().isVector() && VectorWidth) { + MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, IdxN); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); @@ -2921,7 +3035,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // throughout the function's lifetime. bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && - isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout()); + isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -2941,8 +3055,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory( - MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) { + else if (AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3057,7 +3171,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Value *PtrOperand = I.getArgOperand(1); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(I.getArgOperand(0)); @@ -3081,56 +3195,63 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } -// Gather/scatter receive a vector of pointers. -// This vector of pointers may be represented as a base pointer + vector of -// indices, it depends on GEP and instruction preceeding GEP -// that calculates indices +// Get a uniform base for the Gather/Scatter intrinsic. +// The first argument of the Gather/Scatter intrinsic is a vector of pointers. +// We try to represent it as a base pointer + vector of indices. +// Usually, the vector of pointers comes from a 'getelementptr' instruction. +// The first operand of the GEP may be a single pointer or a vector of pointers +// Example: +// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind +// or +// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind +// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. +// +// When the first GEP operand is a single pointer - it is the uniform base we +// are looking for. If first operand of the GEP is a splat vector - we +// extract the spalt value and use it as a uniform base. +// In all other cases the function returns 'false'. +// static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); - GetElementPtrInst *Gep = dyn_cast(Ptr); - if (!Gep || Gep->getNumOperands() > 2) + SelectionDAG& DAG = SDB->DAG; + LLVMContext &Context = *DAG.getContext(); + + assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + GetElementPtrInst *GEP = dyn_cast(Ptr); + if (!GEP || GEP->getNumOperands() > 2) return false; - ShuffleVectorInst *ShuffleInst = - dyn_cast(Gep->getPointerOperand()); - if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || - cast(ShuffleInst->getOperand(0))->getOpcode() != - Instruction::InsertElement) + + Value *GEPPtr = GEP->getPointerOperand(); + if (!GEPPtr->getType()->isVectorTy()) + Ptr = GEPPtr; + else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Ptr = cast(ShuffleInst->getOperand(0))->getOperand(1); + Value *IndexVal = GEP->getOperand(1); - SelectionDAG& DAG = SDB->DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Check is the Ptr is inside current basic block - // If not, look for the shuffle instruction - if (SDB->findValue(Ptr)) - Base = SDB->getValue(Ptr); - else if (SDB->findValue(ShuffleInst)) { - SDValue ShuffleNode = SDB->getValue(ShuffleInst); - SDLoc sdl = ShuffleNode; - Base = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, sdl, - ShuffleNode.getValueType().getScalarType(), ShuffleNode, - DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDB->setValue(Ptr, Base); - } - else + // The operands of the GEP may be defined in another basic block. + // In this case we'll not find nodes for the operands. + if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; - Value *IndexVal = Gep->getOperand(1); - if (SDB->findValue(IndexVal)) { - Index = SDB->getValue(IndexVal); + Base = SDB->getValue(Ptr); + Index = SDB->getValue(IndexVal); - if (SExtInst* Sext = dyn_cast(IndexVal)) { + // Suppress sign extension. + if (SExtInst* Sext = dyn_cast(IndexVal)) { + if (SDB->findValue(Sext->getOperand(0))) { IndexVal = Sext->getOperand(0); - if (SDB->findValue(IndexVal)) - Index = SDB->getValue(IndexVal); + Index = SDB->getValue(IndexVal); } - return true; } - return false; + if (!Index.getValueType().isVector()) { + unsigned GEPWidth = GEP->getType()->getVectorNumElements(); + EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); + SmallVector Ops(GEPWidth, Index); + Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + } + return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { @@ -3191,7 +3312,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue InChain = DAG.getRoot(); if (AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) { + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. InChain = DAG.getEntryNode(); } @@ -3234,8 +3356,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory( - MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) { + AA->pointsToConstantMemory(MemoryLocation( + BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3512,6 +3635,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, SelectionDAG &DAG) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); @@ -3610,6 +3735,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe + + // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); @@ -3623,6 +3750,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3719,6 +3849,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3814,6 +3947,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3923,6 +4059,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, } } + // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: @@ -3956,9 +4093,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttribute(Attribute::OptimizeForSize) || - // If optimizing for size, don't insert too many multiplies. This - // inserts up to 5 multiplies. + if (!F->optForSize() || + // If optimizing for size, don't insert too many multiplies. + // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, @@ -3966,6 +4103,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; + // TODO: Intrinsics should have fast-math-flags that propagate to these + // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) @@ -4239,8 +4378,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || - isa(Address); + bool isParameter = Variable->isParameter() || isa(Address); const AllocaInst *AI = dyn_cast(Address); @@ -4257,15 +4395,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N); return nullptr; } - } else if (AI) + } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); - else { - // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); - DEBUG(Address->dump()); - return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { @@ -4422,6 +4554,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::eh_sjlj_setup_dispatch: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, + getRoot())); + return nullptr; + } case Intrinsic::masked_gather: visitMaskedGather(I); @@ -4615,6 +4752,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { + // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -4658,6 +4796,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; + case Intrinsic::uabsdiff: + setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::sabsdiff: + setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); @@ -4744,8 +4894,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, - MachinePointerInfo::getFixedStack(FI), + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI), true, false, 0); setValue(&I, Res); DAG.setRoot(Res); @@ -5041,7 +5191,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(Reg && "cannot get exception code on this platform"); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); - assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad"); + assert(FuncInfo.MBB->isEHPad() && "eh.exceptioncode in non-lpad"); unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); @@ -5054,11 +5204,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = nullptr; - if (LandingPad) { + if (EHPadBB) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI.getContext().createTempSymbol(); @@ -5068,7 +5218,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); - LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); @@ -5101,14 +5251,20 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(Result.second); } - if (LandingPad) { + if (EHPadBB) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + if (MMI.hasEHFunclets()) { + WinEHFuncInfo &EHInfo = + MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction()); + EHInfo.addIPToStateRange(EHPadBB, BeginLabel, EndLabel); + } else { + MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } } return Result; @@ -5116,7 +5272,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { PointerType *PT = cast(CS.getCalledValue()->getType()); FunctionType *FTy = cast(PT->getElementType()); Type *RetTy = FTy->getReturnType(); @@ -5155,7 +5311,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CS) .setTailCall(isTailCall); - std::pair Result = lowerInvokable(CLI, LandingPad); + std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); @@ -5979,7 +6135,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); std::pair MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); @@ -6038,10 +6194,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); - Chain = DAG.getStore(Chain, getCurSDLoc(), - OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + Chain = DAG.getStore( + Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -6461,12 +6617,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair -SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, - unsigned NumArgs, SDValue Callee, - Type *ReturnTy, - MachineBasicBlock *LandingPad, - bool IsPatchPoint) { +std::pair SelectionDAGBuilder::lowerCallOperands( + ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, + Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6490,7 +6643,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - return lowerInvokable(CLI, LandingPad); + return lowerInvokable(CLI, EHPadBB); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6594,7 +6747,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , // i32 , // i8* , @@ -6631,9 +6784,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); - std::pair Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, - LandingPad, true); + std::pair Result = lowerCallOperands( + CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -6878,7 +7030,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (Args[i].Alignment) FrameAlign = Args[i].Alignment; else - FrameAlign = getByValTypeAlignment(ElementTy); + FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (Args[i].isNest) @@ -6987,8 +7139,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, - false, false, 1); + MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), + DemoteStackIdx, Offsets[i]), + false, false, false, 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -7149,7 +7302,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else - FrameAlign = TLI->getByValTypeAlignment(ElementTy); + FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) @@ -7708,12 +7861,22 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; + ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; @@ -7756,8 +7919,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI)); + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalWeight); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); @@ -7950,7 +8114,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } // Compute total weight. - uint32_t UnhandledWeights = 0; + uint32_t DefaultWeight = W.DefaultWeight; + uint32_t UnhandledWeights = DefaultWeight; for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { UnhandledWeights += I->Weight; assert(UnhandledWeights >= I->Weight && "Weight overflow!"); @@ -7968,6 +8133,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } + UnhandledWeights -= I->Weight; switch (I->Kind) { case CC_JumpTable: { @@ -7978,8 +8144,26 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); - addSuccessorWithWeight(CurMBB, Fallthrough); - addSuccessorWithWeight(CurMBB, JumpMBB); + + uint32_t JumpWeight = I->Weight; + uint32_t FallthroughWeight = UnhandledWeights; + + // If the default statement is a target of the jump table, we evenly + // distribute the default weight to successors of CurMBB. Also update + // the weight on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpWeight += DefaultWeight / 2; + FallthroughWeight -= DefaultWeight / 2; + JumpMBB->setSuccWeight(SI, DefaultWeight / 2); + break; + } + } + + addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight); + addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. @@ -8005,8 +8189,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->Parent = CurMBB; BTB->Default = Fallthrough; - // If we're in the right place, emit the bit test header header right now. - if (CurMBB ==SwitchMBB) { + BTB->DefaultWeight = UnhandledWeights; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the weight on the edge to Fallthrough to two successors + // of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Weight += DefaultWeight / 2; + BTB->DefaultWeight -= DefaultWeight / 2; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } @@ -8030,7 +8223,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } // The false weight is the sum of all unhandled cases. - UnhandledWeights -= I->Weight; CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, UnhandledWeights); @@ -8072,8 +8264,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). CaseClusterIt LastLeft = W.FirstCluster; CaseClusterIt FirstRight = W.LastCluster; - uint32_t LeftWeight = LastLeft->Weight; - uint32_t RightWeight = FirstRight->Weight; + uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2; + uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2; // Move LastLeft and FirstRight towards each other from opposite directions to // find a partitioning of the clusters which balances the weight on both @@ -8159,7 +8351,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); - WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8174,7 +8367,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); - WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8275,7 +8469,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back();