X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGBuilder.cpp;h=4f9656532c275412e0b60cba4005b3b0b14bf147;hp=08bdccdeb079bb68eeb6f73b6da626da65523767;hb=1a9ea371223e3811e41b326526e45dbf9f1a7bdc;hpb=5470fb03387f3ec3365eabac2a4e4f188268ec08 diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 08bdccdeb07..4f9656532c2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -22,7 +22,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Analysis.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -79,12 +79,16 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +static cl::opt +EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, + cl::desc("Enable fast-math-flags for DAG nodes")); + // Limit the width of DAG chains. This is important in general to prevent -// prevent DAG-based analysis from blowing up. For example, alias analysis and +// DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to // recognize and avoid this situation within each individual analysis, and // future analyses are likely to have the same behavior. Limiting DAG width is -// the safe approach, and will be especially important with global DAGs. +// the safe approach and will be especially important with global DAGs. // // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st @@ -143,7 +147,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); @@ -157,13 +161,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, // Combine the round and odd parts. Lo = Val; - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); - Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, - TLI.getPointerTy())); + Hi = + DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, + TLI.getPointerTy(DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } @@ -174,7 +179,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.hasBigEndianPartOrdering(ValueVT)) + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { @@ -208,8 +213,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) - return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getTargetConstant(1, DL, TLI.getPointerTy())); + return DAG.getNode( + ISD::FP_ROUND, DL, ValueVT, Val, + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -258,8 +264,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - assert(RegisterVT == Parts[0].getSimpleValueType() && - "Part type doesn't match part!"); + assert(RegisterVT.getSizeInBits() == + Parts[0].getSimpleValueType().getSizeInBits() && + "Part type sizes don't match!"); // Assemble the parts into intermediate operands. SmallVector Ops(NumIntermediates); @@ -301,8 +308,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Vector/Vector bitcast. @@ -312,9 +320,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - bool Smaller = ValueVT.bitsLE(PartEVT); - return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT, Val); + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } @@ -332,11 +338,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, } if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) { - bool Smaller = ValueVT.bitsLE(PartEVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Val); - } + ValueVT.getVectorElementType() != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -358,10 +361,10 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; - assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && + "Copying to an illegal type!"); if (NumParts == 0) return; @@ -429,7 +432,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); @@ -464,7 +467,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, } } - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::reverse(Parts, Parts + OrigNumParts); } @@ -493,9 +496,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, // undef elements. SmallVector Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getConstant(i, DL, - TLI.getVectorIdxTy()))); + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) @@ -513,20 +516,16 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - bool Smaller = PartEVT.bitsLE(ValueVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else{ // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - bool Smaller = ValueVT.bitsLE(PartVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } Parts[0] = Val; @@ -550,14 +549,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) - Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, - IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), DL, - TLI.getVectorIdxTy())); + Ops[i] = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, + DAG.getConstant(i * (NumElements / NumIntermediates), DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); else - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, - DAG.getConstant(i, DL, TLI.getVectorIdxTy())); + Ops[i] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Split the intermediate operands into legal parts. @@ -578,93 +577,24 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } } -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector ValueVTs; +RegsForValue::RegsForValue() {} - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector Regs; - - RegsForValue() {} - - RegsForValue(const SmallVector ®s, - MVT regvt, EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, Type *Ty) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - MVT RegisterVT = tli.getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } +RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, + EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL, unsigned Reg, Type *Ty) { + ComputeValueVTs(TLI, DL, Ty, ValueVTs); - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V = nullptr) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void - getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, - SDValue *Flag, const Value *V, - ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, SDLoc dl, - SelectionDAG &DAG, - std::vector &Ops) const; - }; + for (EVT ValueVT : ValueVTs) { + unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from @@ -860,7 +790,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { // If we clobbered the stack pointer, MFI should know about it. assert(DAG.getMachineFunction().getFrameInfo()-> - hasInlineAsmWithSPAdjust()); + hasOpaqueSPAdjustment()); } } } @@ -871,7 +801,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getTarget().getDataLayout(); + DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -999,8 +929,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - MDLocalVariable *Variable = DI->getVariable(); - MDExpression *Expr = DI->getExpression(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); @@ -1024,18 +954,18 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); - SDValue res; + SDValue Result; if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, - Ty); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), InReg, Ty); SDValue Chain = DAG.getEntryNode(); - res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); - resolveDanglingDebugInfo(V, res); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, Result); } - return res; + return Result; } /// getValue - Return an SDValue for the given Value. @@ -1071,7 +1001,16 @@ bool SelectionDAGBuilder::findValue(const Value *V) const { SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; - if (N.getNode()) return N; + if (N.getNode()) { + if (isa(N) || isa(N)) { + // Remove the debug location from the node as the node is about to be used + // in a location which may differ from the original debug location. This + // is relevant to Constant and ConstantFP nodes because they can appear + // as constant expressions inside PHI nodes. + N->setDebugLoc(DebugLoc()); + } + return N; + } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); @@ -1086,7 +1025,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast(V)) { - EVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) return DAG.getConstant(*CI, getCurSDLoc(), VT); @@ -1096,7 +1035,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS)); + return DAG.getConstant(0, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout(), AS)); } if (const ConstantFP *CFP = dyn_cast(C)) @@ -1150,7 +1090,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector ValueVTs; - ComputeValueVTs(TLI, C->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1182,7 +1122,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); - EVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = + TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1202,13 +1143,15 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + return DAG.getFrameIndex(SI->second, + TLI.getPointerTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, + Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1216,8 +1159,98 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { llvm_unreachable("Can't get register for value!"); } +void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { + llvm_unreachable("should never codegen catchpads"); +} + +void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { + // Update machine-CFG edge. + MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; + FuncInfo.MBB->addSuccessor(TargetMBB); + + // Create the terminator node. + SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB)); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCatchEndPad(const CatchEndPadInst &I) { + llvm_unreachable("should never codegen catchendpads"); +} + +void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { + // Don't emit any special code for the cleanuppad instruction. It just marks + // the start of a funclet. + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); +} + +/// When an invoke or a cleanupret unwinds to the next EH pad, there are +/// many places it could ultimately go. In the IR, we have a single unwind +/// destination, but in the machine CFG, we enumerate all the possible blocks. +/// This function skips over imaginary basic blocks that hold catchpad, +/// terminatepad, or catchendpad instructions, and finds all the "real" machine +/// basic block destinations. +static void +findUnwindDestinations(FunctionLoweringInfo &FuncInfo, + const BasicBlock *EHPadBB, + SmallVectorImpl &UnwindDests) { + bool IsMSVCCXX = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()) == + EHPersonality::MSVC_CXX; + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + if (isa(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]); + break; + } else if (isa(Pad) || isa(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.push_back(FuncInfo.MBBMap[EHPadBB]); + UnwindDests.back()->setIsEHFuncletEntry(); + break; + } else if (const auto *CPI = dyn_cast(Pad)) { + // Add the catchpad handler to the possible destinations. + UnwindDests.push_back(FuncInfo.MBBMap[CPI->getNormalDest()]); + // In MSVC C++, catchblocks are funclets and need prologues. + if (IsMSVCCXX) + UnwindDests.back()->setIsEHFuncletEntry(); + EHPadBB = CPI->getUnwindDest(); + } else if (const auto *CEPI = dyn_cast(Pad)) { + EHPadBB = CEPI->getUnwindDest(); + } else if (const auto *CEPI = dyn_cast(Pad)) { + EHPadBB = CEPI->getUnwindDest(); + } + } +} + +void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { + // Update successor info. + // FIXME: The weights for catchpads will be wrong. + SmallVector UnwindDests; + findUnwindDestinations(FuncInfo, I.getUnwindDest(), UnwindDests); + for (MachineBasicBlock *UnwindDest : UnwindDests) { + UnwindDest->setIsEHPad(); + addSuccessorWithWeight(FuncInfo.MBB, UnwindDest); + } + + // Create the terminator node. + SDValue Ret = + DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCleanupEndPad(const CleanupEndPadInst &I) { + report_fatal_error("visitCleanupEndPad not yet implemented!"); +} + +void SelectionDAGBuilder::visitTerminatePad(const TerminatePadInst &TPI) { + report_fatal_error("visitTerminatePad not yet implemented!"); +} + void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto &DL = DAG.getDataLayout(); SDValue Chain = getControlRoot(); SmallVector Outs; SmallVector OutVals; @@ -1230,7 +1263,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1238,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); @@ -1258,7 +1291,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -1432,13 +1465,11 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); - } else if (const FCmpInst *FC = dyn_cast(Cond)) { + } else { + const FCmpInst *FC = cast(Cond); Condition = getFCmpCondCode(FC->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - } else { - (void)Condition; // silence warning. - llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, @@ -1468,7 +1499,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc, uint32_t TWeight, + Instruction::BinaryOps Opc, + uint32_t TWeight, uint32_t FWeight) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast(Cond); @@ -1501,8 +1533,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) - // = TrueProb for orignal BB. - // Assuming the orignal weights are A and B, one choice is to set BB1's + // = TrueProb for original BB. + // Assuming the original weights are A and B, one choice is to set BB1's // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice // assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. @@ -1537,8 +1569,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // We have flexibility in setting Prob for BB1 and Prob for TmpBB. // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) - // = FalseProb for orignal BB. - // Assuming the orignal weights are A and B, one choice is to set BB1's + // = FalseProb for original BB. + // Assuming the original weights are A and B, one choice is to set BB1's // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice // assumes that // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. @@ -1632,11 +1664,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!DAG.getTargetLoweringInfo().isJumpExpensive() && - BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && + !I.getMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), + Opcode, getEdgeWeight(BrMBB, Succ0MBB), getEdgeWeight(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always @@ -1747,7 +1780,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1778,9 +1811,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy()); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); + unsigned JumpTableReg = + FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1788,11 +1822,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = - DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), - ISD::SETUGT); + SDValue CMP = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, @@ -1817,7 +1850,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); @@ -1826,8 +1859,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue GuardPtr = getValue(IRGuard); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - unsigned Align = - TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); SDValue Guard; SDLoc dl = getCurSDLoc(); @@ -1845,19 +1877,19 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - StackSlotPtr, - MachinePointerInfo::getFixedStack(FI), - true, false, false, Align); + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, + false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); - SDValue Cmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); + SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. @@ -1903,10 +1935,10 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue RangeCmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); + SDValue RangeCmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; @@ -1922,7 +1954,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI.getPointerTy(); + VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } @@ -1932,8 +1964,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithWeight(SwitchBB, B.Default); - addSuccessorWithWeight(SwitchBB, MBB); + addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight); + addSuccessorWithWeight(SwitchBB, MBB, B.Weight); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, @@ -1964,13 +1996,15 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), + ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), + ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, @@ -1979,8 +2013,9 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, dl, VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); - Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, - DAG.getConstant(0, dl, VT), ISD::SETNE); + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -2003,9 +2038,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; - // Retrieve successors. + // Retrieve successors. Look through artificial IR level blocks like catchpads + // and catchendpads for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; - MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + const BasicBlock *EHPadBB = I.getSuccessor(1); const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast(Callee); @@ -2020,14 +2056,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I, LandingPad); + visitPatchpoint(&I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } } else - LowerCallTo(&I, getValue(Callee), false, LandingPad); + LowerCallTo(&I, getValue(Callee), false, EHPadBB); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -2037,9 +2073,16 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); } - // Update successor info + SmallVector UnwindDests; + findUnwindDestinations(FuncInfo, EHPadBB, UnwindDests); + + // Update successor info. + // FIXME: The weights for catchpads will be wrong. addSuccessorWithWeight(InvokeMBB, Return); - addSuccessorWithWeight(InvokeMBB, LandingPad); + for (MachineBasicBlock *UnwindDest : UnwindDests) { + UnwindDest->setIsEHPad(); + addSuccessorWithWeight(InvokeMBB, UnwindDest); + } // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), @@ -2052,7 +2095,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { - assert(FuncInfo.MBB->isLandingPad() && + assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; @@ -2068,7 +2111,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { SmallVector ValueVTs; SDLoc dl = getCurSDLoc(); - ComputeValueVTs(TLI, LP.getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been @@ -2077,14 +2120,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { if (FuncInfo.ExceptionPointerVirtReg) { Ops[0] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + FuncInfo.ExceptionPointerVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[0]); } else { - Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy()); + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); } Ops[1] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, - FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), + FuncInfo.ExceptionSelectorVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[1]); // Merge into one. @@ -2093,28 +2138,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } -unsigned -SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, - MachineBasicBlock *LPadBB) { - SDValue Chain = getControlRoot(); - SDLoc dl = getCurSDLoc(); - - // Get the typeid that we will dispatch on later. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); - unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); - SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel); - - // Branch to the main landing pad block. - MachineBasicBlock *ClauseMBB = FuncInfo.MBB; - ClauseMBB->addSuccessor(LPadBB); - DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain, - DAG.getBasicBlock(LPadBB))); - return VReg; -} - void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { #ifndef NDEBUG for (const CaseCluster &CC : Clusters) @@ -2184,7 +2207,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (DAG.getTarget().Options.TrapUnreachable) - DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2208,6 +2232,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { bool nuw = false; bool nsw = false; bool exact = false; + FastMathFlags FMF; + if (const OverflowingBinaryOperator *OFBinOp = dyn_cast(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); @@ -2216,9 +2242,22 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); - + if (const FPMathOperator *FPOp = dyn_cast(&I)) + FMF = FPOp->getFastMathFlags(); + + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); + if (EnableFMFInDAG) { + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + } SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, nuw, nsw, exact); + Op1, Op2, &Flags); setValue(&I, BinNodeValue); } @@ -2226,8 +2265,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = - DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( + Op2.getValueType(), DAG.getDataLayout()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -2266,9 +2305,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { dyn_cast(&I)) exact = ExactOp->isExact(); } - + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - nuw, nsw, exact); + &Flags); setValue(&I, Res); } @@ -2276,17 +2318,11 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - // Turn exact SDivs into multiplications. - // FIXME: This should be in DAGCombiner, but it doesn't have access to the - // exact bit. - if (isa(&I) && cast(&I)->isExact() && - !isa(Op1) && - isa(Op2) && !cast(Op2)->isNullValue()) - setValue(&I, DAG.getTargetLoweringInfo() - .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); - else - setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), - Op1, Op2)); + SDNodeFlags Flags; + Flags.setExact(isa(&I) && + cast(&I)->isExact()); + setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, + Op2, &Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { @@ -2299,7 +2335,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } @@ -2312,33 +2349,93 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. + // FIXME: We should propagate the fast-math-flags to the DAG node itself for + // further optimization, but currently FMF is only applicable to binary nodes. if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector ValueVTs; - ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), + ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; SmallVector Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); - SDValue TrueVal = getValue(I.getOperand(1)); - SDValue FalseVal = getValue(I.getOperand(2)); + SDValue LHSVal = getValue(I.getOperand(1)); + SDValue RHSVal = getValue(I.getOperand(2)); + auto BaseOps = {Cond}; ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; - for (unsigned i = 0; i != NumValues; ++i) + // Min/max matching is only viable if all output VTs are the same. + if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { + EVT VT = ValueVTs[0]; + LLVMContext &Ctx = *DAG.getContext(); + auto &TLI = DAG.getTargetLoweringInfo(); + while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + VT = TLI.getTypeToTransformTo(Ctx, VT); + + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPR.Flavor) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; + case SPNB_RETURNS_ANY: + Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM + : ISD::FMINNAN; + break; + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; + case SPNB_RETURNS_ANY: + Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM + : ISD::FMAXNAN; + break; + } + break; + default: break; + } + + if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && + // If the underlying comparison instruction is used by any other instruction, + // the consumed instructions won't be destroyed, so it is not profitable + // to convert to a min/max. + cast(&I)->getCondition()->hasOneUse()) { + OpCode = Opc; + LHSVal = getValue(LHS); + RHSVal = getValue(RHS); + BaseOps = {}; + } + } + + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), - Cond, - SDValue(TrueVal.getNode(), - TrueVal.getResNo() + i), - SDValue(FalseVal.getNode(), - FalseVal.getResNo() + i)); + LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), + Ops); + } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); @@ -2347,7 +2444,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } @@ -2355,7 +2453,8 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2363,7 +2462,8 @@ void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2372,43 +2472,49 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, - DAG.getTargetConstant(0, dl, TLI.getPointerTy()))); + DAG.getTargetConstant( + 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } @@ -2416,7 +2522,8 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } @@ -2424,14 +2531,16 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. @@ -2453,7 +2562,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); @@ -2468,19 +2577,21 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), - getCurSDLoc(), TLI.getVectorIdxTy()); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), - TLI.getValueType(I.getType()), InVec, InVal, InIdx)); + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), - getCurSDLoc(), TLI.getVectorIdxTy()); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - TLI.getValueType(I.getType()), InVec, InIdx)); + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -2503,7 +2614,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { unsigned MaskNumElts = Mask.size(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -2625,7 +2736,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDLoc dl = getCurSDLoc(); Src = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Src, - DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy())); + DAG.getConstant(StartIdx[Input], dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); } } @@ -2652,7 +2764,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = TLI.getVectorIdxTy(); + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SDLoc dl = getCurSDLoc(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { @@ -2687,9 +2799,9 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); SmallVector ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); @@ -2733,7 +2845,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -2766,6 +2878,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector. + unsigned VectorWidth = I.getType()->isVectorTy() ? + cast(I.getType())->getVectorNumElements() : 0; + + if (VectorWidth && !N.getValueType().isVector()) { + MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, N); + N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { const Value *Idx = *OI; @@ -2781,16 +2903,25 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = StTy->getElementType(Field); } else { Ty = cast(Ty)->getElementType(); - MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS); + MVT PtrTy = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); unsigned PtrSize = PtrTy.getSizeInBits(); APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); - // If this is a constant subscript, handle it quickly. - if (const auto *CI = dyn_cast(Idx)) { + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + const auto *CI = dyn_cast(Idx); + if (!CI && isa(Idx) && + cast(Idx)->getSplatValue()) + CI = cast(cast(Idx)->getSplatValue()); + + if (CI) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + SDValue OffsVal = VectorWidth ? + DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, PtrTy); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -2798,6 +2929,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); + if (!IdxN.getValueType().isVector() && VectorWidth) { + MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + SmallVector Ops(VectorWidth, IdxN); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); @@ -2834,14 +2970,14 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), - I.getAlignment()); + std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -2889,7 +3025,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + + // The IR notion of invariant_load only guarantees that all *non-faulting* + // invariant loads result in the same value. The MI notion of invariant load + // guarantees that the load can be legally moved to any location within its + // containing function. The MI notion of invariant_load is stronger than the + // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load + // with a guarantee that the location being loaded from is dereferenceable + // throughout the function's lifetime. + + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && + isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -2899,7 +3045,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2909,8 +3055,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory( - AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { + else if (AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -2925,8 +3071,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector Values(NumValues); - SmallVector Chains(std::min(unsigned(MaxParallelChains), - NumValues)); + SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -2977,8 +3122,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), - ValueVTs, &Offsets); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2990,8 +3135,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); - SmallVector Chains(std::min(unsigned(MaxParallelChains), - NumValues)); + SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; @@ -3027,7 +3171,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Value *PtrOperand = I.getArgOperand(1); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(I.getArgOperand(0)); @@ -3051,55 +3195,63 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } -// Gather/scatter receive a vector of pointers. -// This vector of pointers may be represented as a base pointer + vector of -// indices, it depends on GEP and instruction preceeding GEP -// that calculates indices +// Get a uniform base for the Gather/Scatter intrinsic. +// The first argument of the Gather/Scatter intrinsic is a vector of pointers. +// We try to represent it as a base pointer + vector of indices. +// Usually, the vector of pointers comes from a 'getelementptr' instruction. +// The first operand of the GEP may be a single pointer or a vector of pointers +// Example: +// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind +// or +// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind +// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. +// +// When the first GEP operand is a single pointer - it is the uniform base we +// are looking for. If first operand of the GEP is a splat vector - we +// extract the spalt value and use it as a uniform base. +// In all other cases the function returns 'false'. +// static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); - GetElementPtrInst *Gep = dyn_cast(Ptr); - if (!Gep || Gep->getNumOperands() > 2) + SelectionDAG& DAG = SDB->DAG; + LLVMContext &Context = *DAG.getContext(); + + assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + GetElementPtrInst *GEP = dyn_cast(Ptr); + if (!GEP || GEP->getNumOperands() > 2) return false; - ShuffleVectorInst *ShuffleInst = - dyn_cast(Gep->getPointerOperand()); - if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || - cast(ShuffleInst->getOperand(0))->getOpcode() != - Instruction::InsertElement) + + Value *GEPPtr = GEP->getPointerOperand(); + if (!GEPPtr->getType()->isVectorTy()) + Ptr = GEPPtr; + else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Ptr = cast(ShuffleInst->getOperand(0))->getOperand(1); + Value *IndexVal = GEP->getOperand(1); - SelectionDAG& DAG = SDB->DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Check is the Ptr is inside current basic block - // If not, look for the shuffle instruction - if (SDB->findValue(Ptr)) - Base = SDB->getValue(Ptr); - else if (SDB->findValue(ShuffleInst)) { - SDValue ShuffleNode = SDB->getValue(ShuffleInst); - SDLoc sdl = ShuffleNode; - Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, - ShuffleNode.getValueType().getScalarType(), ShuffleNode, - DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); - SDB->setValue(Ptr, Base); - } - else + // The operands of the GEP may be defined in another basic block. + // In this case we'll not find nodes for the operands. + if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; - Value *IndexVal = Gep->getOperand(1); - if (SDB->findValue(IndexVal)) { - Index = SDB->getValue(IndexVal); + Base = SDB->getValue(Ptr); + Index = SDB->getValue(IndexVal); - if (SExtInst* Sext = dyn_cast(IndexVal)) { + // Suppress sign extension. + if (SExtInst* Sext = dyn_cast(IndexVal)) { + if (SDB->findValue(Sext->getOperand(0))) { IndexVal = Sext->getOperand(0); - if (SDB->findValue(IndexVal)) - Index = SDB->getValue(IndexVal); + Index = SDB->getValue(IndexVal); } - return true; } - return false; + if (!Index.getValueType().isVector()) { + unsigned GEPWidth = GEP->getType()->getVectorNumElements(); + EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); + SmallVector Ops(GEPWidth, Index); + Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + } + return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { @@ -3129,7 +3281,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; @@ -3149,7 +3301,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned Alignment = (cast(I.getArgOperand(1)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3159,10 +3311,9 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory( - AliasAnalysis::Location(PtrOperand, - AA->getTypeStoreSize(I.getType()), - AAInfo))) { + if (AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. InChain = DAG.getEntryNode(); } @@ -3189,7 +3340,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned Alignment = (cast(I.getArgOperand(1)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3204,10 +3355,10 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; - if (UniformBase && AA->pointsToConstantMemory( - AliasAnalysis::Location(BasePtr, - AA->getTypeStoreSize(I.getType()), - AAInfo))) { + if (UniformBase && + AA->pointsToConstantMemory(MemoryLocation( + BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3220,7 +3371,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Alignment, AAInfo, Ranges); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { Root, Src0, Mask, Base, Index }; @@ -3297,8 +3448,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), dl, + TLI.getPointerTy(DAG.getDataLayout())); + Ops[2] = DAG.getConstant(I.getSynchScope(), dl, + TLI.getPointerTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3310,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3345,7 +3498,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getValueOperand()->getType()); + EVT VT = + TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); @@ -3388,7 +3542,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3397,7 +3551,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3431,7 +3585,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast(I.getType())) { - EVT VT = TLI.getValueType(PTy); + EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } @@ -3464,8 +3618,9 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); - SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, dl, TLI.getPointerTy())); + SDValue t1 = DAG.getNode( + ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); @@ -3480,6 +3635,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, SelectionDAG &DAG) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); @@ -3490,7 +3647,8 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode( ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy())); + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( + DAG.getDataLayout()))); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { @@ -3577,6 +3735,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe + + // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); @@ -3590,6 +3750,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3686,6 +3849,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3781,6 +3947,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3890,6 +4059,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, } } + // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: @@ -3923,9 +4093,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttribute(Attribute::OptimizeForSize) || - // If optimizing for size, don't insert too many multiplies. This - // inserts up to 5 multiplies. + if (!F->optForSize() || + // If optimizing for size, don't insert too many multiplies. + // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, @@ -3933,6 +4103,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; + // TODO: Intrinsics should have fast-math-flags that propagate to these + // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) @@ -3980,8 +4152,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( - const Value *V, MDLocalVariable *Variable, MDExpression *Expr, - MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; @@ -4077,25 +4249,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return nullptr; case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); - EVT VT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Res = DAG.getNode(ISD::READ_REGISTER, sdl, + DAG.getVTList(VT, MVT::Other), Chain, RegName); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); - SDValue Chain = getValue(RegValue).getOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, @@ -4175,8 +4353,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); if (!Address) { @@ -4200,8 +4378,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || - isa(Address); + bool isParameter = Variable->isParameter() || isa(Address); const AllocaInst *AI = dyn_cast(Address); @@ -4218,15 +4395,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N); return nullptr; } - } else if (AI) + } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); - else { - // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); - DEBUG(Address->dump()); - return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { @@ -4257,8 +4428,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgValueInst &DI = cast(I); assert(DI.getVariable() && "Missing variable"); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4337,14 +4508,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); SDValue Offset = DAG.getNode(ISD::ADD, sdl, CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), - DAG.getConstant(0, sdl, TLI.getPointerTy())); + SDValue FA = DAG.getNode( + ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), + DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -4382,6 +4554,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::eh_sjlj_setup_dispatch: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, + getRoot())); + return nullptr; + } case Intrinsic::masked_gather: visitMaskedGather(I); @@ -4446,7 +4623,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, sdl, MVT::i32), @@ -4476,7 +4653,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), @@ -4566,7 +4743,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)))); return nullptr; case Intrinsic::fmuladd: { - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, @@ -4575,6 +4752,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { + // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -4595,10 +4773,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: - setValue(&I, - DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), - DAG.getNode(ISD::BITCAST, sdl, MVT::f16, - getValue(I.getArgOperand(0))))); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); @@ -4618,6 +4796,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; + case Intrinsic::uabsdiff: + setValue(&I, DAG.getNode(ISD::UABSDIFF, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::sabsdiff: + setValue(&I, DAG.getNode(ISD::SABSDIFF, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); @@ -4642,8 +4832,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::stacksave: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); + Res = DAG.getNode( + ISD::STACKSAVE, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; @@ -4657,7 +4848,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); const Value *Ptr = cast(I.getArgOperand(0))->getPointerOperand(); const GlobalVariable *GV = dyn_cast(Ptr); @@ -4703,8 +4894,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, - MachinePointerInfo::getFixedStack(FI), + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI), true, false, 0); setValue(&I, Res); DAG.setRoot(Res); @@ -4755,7 +4946,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, - TLI.getPointerTy(), + TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; } @@ -4783,7 +4974,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::debugtrap: case Intrinsic::trap: { - StringRef TrapFuncName = TM.Options.getTrapFunctionName(); + StringRef TrapFuncName = + I.getAttributes() + .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") + .getValueAsString(); if (TrapFuncName.empty()) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; @@ -4793,10 +4987,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()) - .setCallee(CallingConv::C, I.getType(), - DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), - std::move(Args), 0); + CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( + CallingConv::C, I.getType(), + DAG.getExternalSymbol(TrapFuncName.data(), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -4872,7 +5067,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + Ops[1] = + DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -4882,7 +5078,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; case Intrinsic::invariant_end: // Discard region information. @@ -4902,7 +5098,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); case Intrinsic::eh_actions: - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; case Intrinsic::donothing: // ignore @@ -4934,11 +5130,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - case Intrinsic::frameescape: { + case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); @@ -4952,7 +5148,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, - TII->get(TargetOpcode::FRAME_ALLOC)) + TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) .addFrameIndex(FI); } @@ -4960,10 +5156,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::framerecover: { - // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) + case Intrinsic::localrecover: { + // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); - MVT PtrVT = TLI.getPointerTy(0); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); // Get the symbol that defines the frame offset. auto *Fn = cast(I.getArgOperand(0)->stripPointerCasts()); @@ -4973,13 +5169,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); - // Create a TargetExternalSymbol for the label to avoid any target lowering + // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. - StringRef Name = FrameAllocSym->getName(); - assert(Name.data()[Name.size()] == '\0' && "not null terminated"); - SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); + SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); SDValue OffsetVal = - DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); + DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. Value *FP = I.getArgOperand(1); @@ -4995,8 +5189,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_exceptioncode: { unsigned Reg = TLI.getExceptionPointerRegister(); assert(Reg && "cannot get exception code on this platform"); - MVT PtrVT = TLI.getPointerTy(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + assert(FuncInfo.MBB->isEHPad() && "eh.exceptioncode in non-lpad"); unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); @@ -5009,21 +5204,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = nullptr; - if (LandingPad) { + if (EHPadBB) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().CreateTempSymbol(); + BeginLabel = MMI.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); - LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); @@ -5056,14 +5251,20 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(Result.second); } - if (LandingPad) { + if (EHPadBB) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + if (MMI.hasEHFunclets()) { + WinEHFuncInfo &EHInfo = + MMI.getWinEHFuncInfo(DAG.getMachineFunction().getFunction()); + EHInfo.addIPToStateRange(EHPadBB, BeginLabel, EndLabel); + } else { + MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } } return Result; @@ -5071,7 +5272,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { PointerType *PT = cast(CS.getCalledValue()->getType()); FunctionType *FTy = cast(PT->getElementType()); Type *RetTy = FTy->getReturnType(); @@ -5110,7 +5311,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CS) .setTailCall(isTailCall); - std::pair Result = lowerInvokable(CLI, LandingPad); + std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); @@ -5178,7 +5379,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { - EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); + EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); if (IsSigned) Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); else @@ -5203,7 +5405,8 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast(Size); if (CSize && CSize->getZExtValue() == 0) { - EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); + EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } @@ -5487,7 +5690,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - if (unsigned IID = F->getIntrinsicID()) { + if (Intrinsic::ID IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; @@ -5640,8 +5843,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!RenameFn) Callee = getValue(I.getCalledValue()); else - Callee = DAG.getExternalSymbol(RenameFn, - DAG.getTargetLoweringInfo().getPointerTy()); + Callee = DAG.getExternalSymbol( + RenameFn, + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -5670,13 +5874,12 @@ public: /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. - EVT getCallOperandValEVT(LLVMContext &Context, - const TargetLowering &TLI, - const DataLayout *DL) const { + EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL) const { if (!CallOperandVal) return MVT::Other; if (isa(CallOperandVal)) - return TLI.getPointerTy(); + return TLI.getPointerTy(DL); llvm::Type *OpTy = CallOperandVal->getType(); @@ -5698,7 +5901,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = DL->getTypeSizeInBits(OpTy); + unsigned BitSize = DL.getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -5712,7 +5915,7 @@ public: } } - return TLI.getValueType(OpTy, true); + return TLI.getValueType(DL, OpTy, true); } }; @@ -5838,8 +6041,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( + DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; @@ -5864,10 +6067,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(CS.getType())) { - OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), + STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getSimpleValueType(CS.getType()); + OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); } ++ResNo; break; @@ -5888,8 +6092,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = - OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, + DAG.getDataLayout()).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -5931,7 +6135,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); std::pair MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); @@ -5977,21 +6181,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const Value *OpVal = OpInfo.CallOperandVal; if (isa(OpVal) || isa(OpVal) || isa(OpVal) || isa(OpVal)) { - OpInfo.CallOperand = DAG.getConstantPool(cast(OpVal), - TLI.getPointerTy()); + OpInfo.CallOperand = DAG.getConstantPool( + cast(OpVal), TLI.getPointerTy(DAG.getDataLayout())); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); - Chain = DAG.getStore(Chain, getCurSDLoc(), - OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + SDValue StackSlot = + DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); + Chain = DAG.getStore( + Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -6022,9 +6228,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // AsmNodeOperands - The operands for the ISD::INLINEASM node. std::vector AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain - AsmNodeOperands.push_back( - DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( + IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6064,8 +6269,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } } - AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetConstant( + ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6201,8 +6406,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetConstant( + OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6227,16 +6432,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - getCurSDLoc(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetConstant( + ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI.getPointerTy() && + assert(InOperandVal.getValueType() == + TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); unsigned ConstraintID = @@ -6314,7 +6519,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6380,9 +6585,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = *TLI.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), - getRoot(), getValue(I.getOperand(0)), + const DataLayout &DL = DAG.getDataLayout(); + SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), + getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlignment(I.getType())); setValue(&I, V); @@ -6412,12 +6617,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair -SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, - unsigned NumArgs, SDValue Callee, - bool UseVoidTy, - MachineBasicBlock *LandingPad, - bool IsPatchPoint) { +std::pair SelectionDAGBuilder::lowerCallOperands( + ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, + Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6436,13 +6638,12 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - return lowerInvokable(CLI, LandingPad); + return lowerInvokable(CLI, EHPadBB); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6474,8 +6675,8 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back( - Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + Ops.push_back(Builder.DAG.getTargetFrameIndex( + FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -6546,7 +6747,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , // i32 , // i8* , @@ -6581,9 +6782,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; - std::pair Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, - LandingPad, true); + Type *ReturnTy = + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); + std::pair Result = lowerCallOperands( + CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -6653,7 +6855,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; - ComputeValueVTs(TLI, CS->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end @@ -6717,10 +6919,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Type *OrigRetTy = CLI.RetTy; SmallVector RetTys; SmallVector Offsets; - ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); + auto &DL = CLI.DAG.getDataLayout(); + ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); SmallVector Outs; - GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); bool CanLowerReturn = this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), @@ -6732,13 +6935,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // FIXME: equivalent assert? // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); - uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); - unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); + uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); + unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; @@ -6783,7 +6986,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; - ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); Type *FinalType = Args[i].Ty; if (Args[i].isByVal) FinalType = cast(Args[i].Ty)->getElementType(); @@ -6796,7 +6999,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -6820,14 +7023,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; if (Args[i].Alignment) FrameAlign = Args[i].Alignment; else - FrameAlign = getByValTypeAlignment(ElementTy); + FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (Args[i].isNest) @@ -6922,7 +7125,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SmallVector PVTs; Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); - ComputeValueVTs(*this, PtrRetTy, PVTs); + ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; @@ -6936,8 +7139,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, - false, false, 1); + MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), + DemoteStackIdx, Offsets[i]), + false, false, false, 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -6996,7 +7200,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType()); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -7029,13 +7234,14 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); - const DataLayout *DL = TLI->getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); SmallVector Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -7052,7 +7258,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ValueVTs; - ComputeValueVTs(*TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; Type *FinalType = I->getType(); @@ -7065,7 +7271,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7089,14 +7295,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else - FrameAlign = TLI->getByValTypeAlignment(ElementTy); + FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) @@ -7152,7 +7358,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; @@ -7176,7 +7383,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ++I, ++Idx) { SmallVector ArgValues; SmallVector ValueVTs; - ComputeValueVTs(*TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); // If this argument is unused then remember its value. It is used to generate @@ -7323,7 +7530,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // the input for this MBB. SmallVector ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); @@ -7473,7 +7680,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, JumpTableHeader JTH(Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), SI->getCondition(), nullptr, false); - JTCases.push_back(JumpTableBlock(JTH, JT)); + JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, JTCases.size() - 1, Weight); @@ -7499,6 +7706,31 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, const int64_t N = Clusters.size(); const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + // TotalCases[i]: Total nbr of cases in Clusters[0..i]. + SmallVector TotalCases(N); + + for (unsigned i = 0; i < N; ++i) { + APInt Hi = Clusters[i].High->getValue(); + APInt Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { + // Cheap case: the whole range might be suitable for jump table. + CaseCluster JTCluster; + if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { + Clusters[0] = JTCluster; + Clusters.resize(1); + return; + } + } + + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + // Split Clusters into minimum number of dense partitions. The algorithm uses // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code // for the Case Statement'" (1994), but builds the MinPartitions array in @@ -7512,16 +7744,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, SmallVector LastElement(N); // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. SmallVector NumTables(N); - // TotalCases[i]: Total nbr of cases in Clusters[0..i]. - SmallVector TotalCases(N); - - for (unsigned i = 0; i < N; ++i) { - APInt Hi = Clusters[i].High->getValue(); - APInt Lo = Clusters[i].Low->getValue(); - TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; - if (i != 0) - TotalCases[i] += TotalCases[i - 1]; - } // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; @@ -7579,7 +7801,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; return Range <= BW; } @@ -7634,16 +7856,27 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, APInt LowBound; APInt CmpRange; - const int BitWidth = - DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); - assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!"); + const int BitWidth = DAG.getTargetLoweringInfo() + .getPointerTy(DAG.getDataLayout()) + .getSizeInBits(); + assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); + + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; + ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; @@ -7664,10 +7897,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, // Update Mask, Bits and ExtraWeight. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); - for (uint64_t j = Lo; j <= Hi; ++j) { - CB->Mask |= 1ULL << j; - CB->Bits++; - } + assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); + CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; + CB->Bits += Hi - Lo + 1; CB->ExtraWeight += Clusters[i].Weight; TotalWeight += Clusters[i].Weight; assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); @@ -7686,9 +7918,10 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); } - BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(), - -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI))); + BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalWeight); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); @@ -7710,9 +7943,13 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); #endif + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(); + EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; @@ -7782,8 +8019,10 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { Clusters[DstIndex++] = BitTestCluster; } else { - for (unsigned I = First; I <= Last; ++I) - std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + size_t NumClusters = Last - First + 1; + std::memmove(&Clusters[DstIndex], &Clusters[First], + sizeof(Clusters[0]) * NumClusters); + DstIndex += NumClusters; } } Clusters.resize(DstIndex); @@ -7819,22 +8058,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, const APInt &BigValue = Big.Low->getValue(); // Check that there is only one bit different. - if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && - (SmallValue | BigValue) == BigValue) { - // Isolate the common bit. - APInt CommonBit = BigValue & ~SmallValue; - assert((SmallValue | CommonBit) == BigValue && - CommonBit.countPopulation() == 1 && "Not a common bit?"); - + APInt CommonBit = BigValue ^ SmallValue; + if (CommonBit.isPowerOf2()) { SDValue CondLHS = getValue(Cond); EVT VT = CondLHS.getValueType(); SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, DL, VT)); - SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or, - DAG.getConstant(BigValue, DL, VT), - ISD::SETEQ); + SDValue Cond = DAG.getSetCC( + DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), + ISD::SETEQ); // Update successor info. // Both Small and Big will jump to Small.BB, so we sum up the weights. @@ -7880,7 +8114,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } // Compute total weight. - uint32_t UnhandledWeights = 0; + uint32_t DefaultWeight = W.DefaultWeight; + uint32_t UnhandledWeights = DefaultWeight; for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { UnhandledWeights += I->Weight; assert(UnhandledWeights >= I->Weight && "Weight overflow!"); @@ -7898,6 +8133,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } + UnhandledWeights -= I->Weight; switch (I->Kind) { case CC_JumpTable: { @@ -7908,8 +8144,26 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); - addSuccessorWithWeight(CurMBB, Fallthrough); - addSuccessorWithWeight(CurMBB, JumpMBB); + + uint32_t JumpWeight = I->Weight; + uint32_t FallthroughWeight = UnhandledWeights; + + // If the default statement is a target of the jump table, we evenly + // distribute the default weight to successors of CurMBB. Also update + // the weight on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpWeight += DefaultWeight / 2; + FallthroughWeight -= DefaultWeight / 2; + JumpMBB->setSuccWeight(SI, DefaultWeight / 2); + break; + } + } + + addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight); + addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. @@ -7935,8 +8189,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->Parent = CurMBB; BTB->Default = Fallthrough; - // If we're in the right place, emit the bit test header header right now. - if (CurMBB ==SwitchMBB) { + BTB->DefaultWeight = UnhandledWeights; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the weight on the edge to Fallthrough to two successors + // of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Weight += DefaultWeight / 2; + BTB->DefaultWeight -= DefaultWeight / 2; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } @@ -7960,7 +8223,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } // The false weight is the sum of all unhandled cases. - UnhandledWeights -= I->Weight; CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, UnhandledWeights); @@ -7976,6 +8238,18 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } +unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, + CaseClusterIt First, + CaseClusterIt Last) { + return std::count_if(First, Last + 1, [&](const CaseCluster &X) { + if (X.Weight != CC.Weight) + return X.Weight > CC.Weight; + + // Ties are broken by comparing the case value. + return X.Low->getValue().slt(CC.Low->getValue()); + }); +} + void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, @@ -7983,17 +8257,83 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); - unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; - assert(NumClusters >= 2 && "Too small to split!"); + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + // Balance the tree based on branch weights to create a near-optimal (in terms + // of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2; + uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the weight on both + // sides. If LeftWeight and RightWeight are equal, alternate which side is + // taken to ensure 0-weight nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) + LeftWeight += (++LastLeft)->Weight; + else + RightWeight += (--FirstRight)->Weight; + I++; + } + + for (;;) { + // Our binary search tree differs from a typical BST in that ours can have up + // to three values in each leaf. The pivot selection above doesn't take that + // into account, which means the tree might require more nodes and be less + // efficient. We compensate for this here. + + unsigned NumLeft = LastLeft - W.FirstCluster + 1; + unsigned NumRight = W.LastCluster - FirstRight + 1; + + if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { + // If one side has less than 3 clusters, and the other has more than 3, + // consider taking a cluster from the other side. + + if (NumLeft < NumRight) { + // Consider moving the first cluster on the right to the left side. + CaseCluster &CC = *FirstRight; + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + if (LeftSideRank <= RightSideRank) { + // Moving the cluster to the left does not demote it. + ++LastLeft; + ++FirstRight; + continue; + } + } else { + assert(NumRight < NumLeft); + // Consider moving the last element on the left to the right side. + CaseCluster &CC = *LastLeft; + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + if (RightSideRank <= LeftSideRank) { + // Moving the cluster to the right does not demot it. + --LastLeft; + --FirstRight; + continue; + } + } + } + break; + } + + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); - // FIXME: When we have profile info, we might want to balance the tree based - // on weights instead of node count. + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); - CaseClusterIt PivotCluster = W.FirstCluster + NumClusters / 2; CaseClusterIt FirstLeft = W.FirstCluster; - CaseClusterIt LastLeft = PivotCluster - 1; - CaseClusterIt FirstRight = PivotCluster; CaseClusterIt LastRight = W.LastCluster; + const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. @@ -8011,7 +8351,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); - WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8026,13 +8367,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); - WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Create the CaseBlock record that will be used to lower the branch. - CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB); + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + LeftWeight, RightWeight); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8048,20 +8391,19 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); - uint32_t Weight = 0; // FIXME: Use 1 instead? - if (BPI) { - Weight = BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()); - assert(Weight <= UINT32_MAX / SI.getNumSuccessors()); - } + uint32_t Weight = + BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; - if (TM.getOptLevel() != CodeGenOpt::None) { - // Cluster adjacent cases with the same destination. - sortAndRangeify(Clusters); + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + if (TM.getOptLevel() != CodeGenOpt::None) { // Replace an unreachable default with the most popular destination. // FIXME: Exploit unreachable default more aggressively. bool UnreachableDefault = @@ -8104,11 +8446,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - if (TM.getOptLevel() != CodeGenOpt::None) { - findJumpTables(Clusters, &SI, DefaultMBB); - findBitTestClusters(Clusters, &SI); - } - + findJumpTables(Clusters, &SI, DefaultMBB); + findBitTestClusters(Clusters, &SI); DEBUG({ dbgs() << "Case clusters: "; @@ -8130,7 +8469,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back();