X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGBuilder.cpp;h=a07a024557cbcef06a07af271845c0da2b80aa42;hp=5c626beff42f362fb389a1aa79ccc0e9f12400c0;hb=bebb0b5a34919d393c1d55d8e76580cbfe8d2895;hpb=fafe7a41a567ad53d95c85e5e4891638a16206a2 diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5c626beff42..a07a024557c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -162,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), + DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, TLI.getPointerTy())); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); @@ -209,7 +208,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getTargetConstant(1, TLI.getPointerTy())); + DAG.getTargetConstant(1, DL, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -302,7 +301,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -426,7 +425,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getIntPtrConstant(RoundBits)); + DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) @@ -453,9 +452,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(1)); + ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(0)); + ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); @@ -494,7 +493,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SmallVector Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getConstant(i, + ElementVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), @@ -521,7 +520,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); + PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -551,12 +551,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), + DAG.getConstant(i * (NumElements / NumIntermediates), DL, TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, DL, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -577,93 +577,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } } -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector ValueVTs; +RegsForValue::RegsForValue() {} - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector Regs; - - RegsForValue() {} - - RegsForValue(const SmallVector ®s, - MVT regvt, EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, Type *Ty) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - MVT RegisterVT = tli.getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } +RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, + EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V = nullptr) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void - getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, - SDValue *Flag, const Value *V, - ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector &Ops) const; - }; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + MVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from @@ -722,7 +654,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // The current value is a zero. // Explicitly express that as it would be easier for // optimizations to kick in. - Parts[i] = DAG.getConstant(0, RegisterVT); + Parts[i] = DAG.getConstant(0, dl, RegisterVT); continue; } @@ -824,7 +756,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, + unsigned MatchingIdx, SDLoc dl, SelectionDAG &DAG, std::vector &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -844,7 +776,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); @@ -998,8 +930,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - MDLocalVariable *Variable = DI->getVariable(); - MDExpression *Expr = DI->getExpression(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); @@ -1023,18 +955,18 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); - SDValue res; + SDValue Result; if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, Ty); SDValue Chain = DAG.getEntryNode(); - res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); - resolveDanglingDebugInfo(V, res); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, Result); } - return res; + return Result; } /// getValue - Return an SDValue for the given Value. @@ -1059,6 +991,12 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return Val; } +// Return true if SDValue exists for the given Value +bool SelectionDAGBuilder::findValue(const Value *V) const { + return (NodeMap.find(V) != NodeMap.end()) || + (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); +} + /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { @@ -1082,18 +1020,18 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) - return DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (isa(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, TLI.getPointerTy(AS)); + return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS)); } if (const ConstantFP *CFP = dyn_cast(C)) - return DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); if (isa(C) && !V->getType()->isAggregateType()) return DAG.getUNDEF(VT); @@ -1153,9 +1091,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) - Constants[i] = DAG.getConstantFP(0, EltVT); + Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Constants[i] = DAG.getConstant(0, EltVT); + Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); } return DAG.getMergeValues(Constants, getCurSDLoc()); @@ -1179,9 +1117,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { SDValue Op; if (EltVT.isFloatingPoint()) - Op = DAG.getConstantFP(0, EltVT); + Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Op = DAG.getConstant(0, EltVT); + Op = DAG.getConstant(0, getCurSDLoc(), EltVT); Ops.assign(NumElements, Op); } @@ -1238,7 +1176,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i])); + DAG.getIntPtrConstant(Offsets[i], + getCurSDLoc())); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -1683,7 +1622,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { - SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); @@ -1697,13 +1636,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, EVT VT = CmpOp.getValueType(); if (cast(CB.CmpLHS)->isMinValue(true)) { - Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, - VT, CmpOp, DAG.getConstant(Low, VT)); + VT, CmpOp, DAG.getConstant(Low, dl, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, - DAG.getConstant(High-Low, VT), ISD::SETULE); + DAG.getConstant(High-Low, dl, VT), ISD::SETULE); } } @@ -1718,7 +1657,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // fall through to the lhs instead of the rhs block. if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); - SDValue True = DAG.getConstant(1, Cond.getValueType()); + SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } @@ -1754,13 +1693,15 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(JTH.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(JTH.First, dl, VT)); // The SDNode we just created, which holds the value being switched on minus // the smallest case value, needs to be copied to a virtual register so it @@ -1768,10 +1709,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy()); unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1779,17 +1720,18 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), + ISD::SETUGT); - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); // Avoid emitting unnecessary branches to the next block. if (JT.MBB != NextBlock(SwitchBB)) - BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); @@ -1819,6 +1761,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); SDValue Guard; + SDLoc dl = getCurSDLoc(); // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the // guard value from the virtual register holding the value. Otherwise, emit a @@ -1826,34 +1769,34 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, unsigned GuardReg = SPD.getGuardReg(); if (GuardReg && TLI.useLoadStackGuardNode()) - Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, PtrTy); else - Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(FI), true, false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); SDValue Cmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), Sub.getValueType()), - Sub, DAG.getConstant(0, VT), ISD::SETNE); + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, StackSlot.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. - SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + SDValue Br = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(SPD.getSuccessMBB())); @@ -1881,18 +1824,20 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(B.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(B.First, dl, VT)); // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue RangeCmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; @@ -1909,26 +1854,25 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } if (UsePtrType) { VT = TLI.getPointerTy(); - Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); + Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), - B.Reg, Sub); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithWeight(SwitchBB, B.Default); addSuccessorWithWeight(SwitchBB, MBB); - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) - BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, + BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); @@ -1941,9 +1885,9 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), - Reg, VT); + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; unsigned PopCount = countPopulation(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -1951,24 +1895,23 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, - DAG.getConstant(1, VT), ShiftOp); + SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, + DAG.getConstant(1, dl, VT), ShiftOp); // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), - VT, SwitchVal, DAG.getConstant(B.Mask, VT)); - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, - DAG.getConstant(0, VT), ISD::SETNE); + SDValue AndOp = DAG.getNode(ISD::AND, dl, + VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); + Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, + DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -1976,13 +1919,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); // Avoid emitting unnecessary branches to the next block. if (NextMBB != NextBlock(SwitchBB)) - BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, + BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); @@ -2055,6 +1998,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { return; SmallVector ValueVTs; + SDLoc dl = getCurSDLoc(); ComputeValueVTs(TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); @@ -2063,19 +2007,19 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { SDValue Ops[2]; if (FuncInfo.ExceptionPointerVirtReg) { Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + dl, ValueVTs[0]); } else { - Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy()); } Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[1]); + dl, ValueVTs[1]); // Merge into one. - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } @@ -2084,609 +2028,56 @@ unsigned SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, MachineBasicBlock *LPadBB) { SDValue Chain = getControlRoot(); + SDLoc dl = getCurSDLoc(); // Get the typeid that we will dispatch on later. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); - SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); - Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); + SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy()); + Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel); // Branch to the main landing pad block. MachineBasicBlock *ClauseMBB = FuncInfo.MBB; ClauseMBB->addSuccessor(LPadBB); - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain, DAG.getBasicBlock(LPadBB))); return VReg; } -/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for -/// small case ranges). -bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - // Size is the number of Cases represented by this range. - size_t Size = CR.Range.second - CR.Range.first; - if (Size > 3) - return false; - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextMBB = nullptr; - MachineFunction::iterator BBI = CR.CaseBB; - if (++BBI != FuncInfo.MF->end()) - NextMBB = BBI; - - BranchProbabilityInfo *BPI = FuncInfo.BPI; - // If any two of the cases has the same destination, and if one value - // is the same as the other, but has one bit unset that the other has set, - // use bit manipulation to do two compares at once. For example: - // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" - // TODO: This could be extended to merge any 2 cases in switches with 3 cases. - // TODO: Handle cases where CR.CaseBB != SwitchBB. - if (Size == 2 && CR.CaseBB == SwitchBB) { - Case &Small = *CR.Range.first; - Case &Big = *(CR.Range.second-1); - - if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { - const APInt& SmallValue = Small.Low->getValue(); - const APInt& BigValue = Big.Low->getValue(); - - // Check that there is only one bit different. - if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && - (SmallValue | BigValue) == BigValue) { - // Isolate the common bit. - APInt CommonBit = BigValue & ~SmallValue; - assert((SmallValue | CommonBit) == BigValue && - CommonBit.countPopulation() == 1 && "Not a common bit?"); - - SDValue CondLHS = getValue(SV); - EVT VT = CondLHS.getValueType(); - SDLoc DL = getCurSDLoc(); - - SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, - DAG.getConstant(CommonBit, VT)); - SDValue Cond = DAG.getSetCC(DL, MVT::i1, - Or, DAG.getConstant(BigValue, VT), - ISD::SETEQ); - - // Update successor info. - // Both Small and Big will jump to Small.BB, so we sum up the weights. - addSuccessorWithWeight(SwitchBB, Small.BB, - Small.ExtraWeight + Big.ExtraWeight); - addSuccessorWithWeight(SwitchBB, Default, - // The default destination is the first successor in IR. - BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); - - // Insert the true branch. - SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, - getControlRoot(), Cond, - DAG.getBasicBlock(Small.BB)); - - // Insert the false branch. - BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, - DAG.getBasicBlock(Default)); - - DAG.setRoot(BrCond); - return true; - } - } - } - - // Order cases by weight so the most likely case will be checked first. - uint32_t UnhandledWeights = 0; - if (BPI) { - for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = I->ExtraWeight; - UnhandledWeights += IWeight; - for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = J->ExtraWeight; - if (IWeight > JWeight) - std::swap(*I, *J); - } - } - } - // Rearrange the case blocks so that the last one falls through if possible. - Case &BackCase = *(CR.Range.second-1); - if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) { - // The last case block won't fall through into 'NextMBB' if we emit the - // branches in this order. See if rearranging a case value would help. - // We start at the bottom as it's the case with the least weight. - for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) - if (I->BB == NextMBB) { - std::swap(*I, BackCase); - break; - } - } - - // Create a CaseBlock record representing a conditional branch to - // the Case's target mbb if the value being switched on SV is equal - // to C. - MachineBasicBlock *CurBlock = CR.CaseBB; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - MachineBasicBlock *FallThrough; - if (I != E-1) { - FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); - CurMF->insert(BBI, FallThrough); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } else { - // If the last case doesn't match, go to the default block. - FallThrough = Default; - } - - const Value *RHS, *LHS, *MHS; - ISD::CondCode CC; - if (I->High == I->Low) { - // This is just small small case range :) containing exactly 1 case - CC = ISD::SETEQ; - LHS = SV; RHS = I->High; MHS = nullptr; - } else { - CC = ISD::SETLE; - LHS = I->Low; MHS = SV; RHS = I->High; - } - - // The false weight should be sum of all un-handled cases. - UnhandledWeights -= I->ExtraWeight; - CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, - /* me */ CurBlock, - /* trueweight */ I->ExtraWeight, - /* falseweight */ UnhandledWeights); - - // If emitting the first comparison, just call visitSwitchCase to emit the - // code into the current block. Otherwise, push the CaseBlock onto the - // vector to be later processed by SDISel, and insert the node's MBB - // before the next MBB. - if (CurBlock == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); - - CurBlock = FallThrough; - } - - return true; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); -} - -static APInt ComputeRange(const APInt &First, const APInt &Last) { - uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); - return (LastExt - FirstExt + 1ULL); -} - -/// handleJTSwitchCase - Emit jumptable for current switch case range -bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, - CaseRecVector &WorkList, - const Value *SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - - const APInt &First = FrontCase.Low->getValue(); - const APInt &Last = BackCase.High->getValue(); - - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) - TSize += I->size(); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) - return false; - - APInt Range = ComputeRange(First, Last); - // The density is TSize / Range. Require at least 40%. - // It should not be possible for IntTSize to saturate for sane code, but make - // sure we handle Range saturation correctly. - uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); - uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); - if (IntTSize * 10 < IntRange * 4) - return false; - - DEBUG(dbgs() << "Lowering jump table\n" - << "First entry: " << First << ". Last entry: " << Last << '\n' - << "Range: " << Range << ". Size: " << TSize << ".\n\n"); - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - // Create a new basic block to hold the code for loading the address - // of the jump table, and jumping to it. Update successor information; - // we will either branch to the default case for the switch, or the jump - // table. - MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, JumpTableBB); - - addSuccessorWithWeight(CR.CaseBB, Default); - addSuccessorWithWeight(CR.CaseBB, JumpTableBB); - - // Build a vector of destination BBs, corresponding to each target - // of the jump table. If the value of the jump table slot corresponds to - // a case statement, push the case's BB onto the vector, otherwise, push - // the default BB. - std::vector DestBBs; - APInt TEI = First; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { - const APInt &Low = I->Low->getValue(); - const APInt &High = I->High->getValue(); - - if (Low.sle(TEI) && TEI.sle(High)) { - DestBBs.push_back(I->BB); - if (TEI==High) - ++I; - } else { - DestBBs.push_back(Default); - } - } - - // Calculate weight for each unique destination in CR. - DenseMap DestWeights; - if (FuncInfo.BPI) { - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) - DestWeights[I->BB] += I->ExtraWeight; - } - - // Update successor info. Add one edge to each unique successor. - BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); - for (MachineBasicBlock *DestBB : DestBBs) { - if (!SuccsHandled[DestBB->getNumber()]) { - SuccsHandled[DestBB->getNumber()] = true; - auto I = DestWeights.find(DestBB); - addSuccessorWithWeight(JumpTableBB, DestBB, - I != DestWeights.end() ? I->second : 0); - } - } - - // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); - unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) - ->createJumpTableIndex(DestBBs); - - // Set the jump table information so that we can codegen it as a second - // MachineBasicBlock - JumpTable JT(-1U, JTI, JumpTableBB, Default); - JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); - if (CR.CaseBB == SwitchBB) - visitJumpTableHeader(JT, JTH, SwitchBB); - - JTCases.push_back(JumpTableBlock(JTH, JT)); - return true; -} - -/// handleBTSplitSwitchCase - emit comparison and split binary search tree into -/// 2 subtrees. -bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* SwitchBB) { - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - - // Size is the number of Cases represented by this range. - unsigned Size = CR.Range.second - CR.Range.first; - - const APInt &First = FrontCase.Low->getValue(); - const APInt &Last = BackCase.High->getValue(); - double FMetric = 0; - CaseItr Pivot = CR.Range.first + Size/2; - - // Select optimal pivot, maximizing sum density of LHS and RHS. This will - // (heuristically) allow us to emit JumpTable's later. - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) - TSize += I->size(); - - APInt LSize = FrontCase.size(); - APInt RSize = TSize-LSize; - DEBUG(dbgs() << "Selecting best pivot: \n" - << "First: " << First << ", Last: " << Last <<'\n' - << "LSize: " << LSize << ", RSize: " << RSize << '\n'); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; - J!=E; ++I, ++J) { - const APInt &LEnd = I->High->getValue(); - const APInt &RBegin = J->Low->getValue(); - APInt Range = ComputeRange(LEnd, RBegin); - assert((Range - 2ULL).isNonNegative() && - "Invalid case distance"); - // Use volatile double here to avoid excess precision issues on some hosts, - // e.g. that use 80-bit X87 registers. - // Only consider the density of sub-ranges that actually have sufficient - // entries to be lowered as a jump table. - volatile double LDensity = - LSize.ult(TLI.getMinimumJumpTableEntries()) - ? 0.0 - : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); - volatile double RDensity = - RSize.ult(TLI.getMinimumJumpTableEntries()) - ? 0.0 - : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); - volatile double Metric = Range.logBase2() * (LDensity + RDensity); - // Should always split in some non-trivial place - DEBUG(dbgs() <<"=>Step\n" - << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' - << "LDensity: " << LDensity - << ", RDensity: " << RDensity << '\n' - << "Metric: " << Metric << '\n'); - if (FMetric < Metric) { - Pivot = J; - FMetric = Metric; - DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); - } - - LSize += J->size(); - RSize -= J->size(); - } - - if (FMetric == 0 || !areJTsAllowed(TLI)) - Pivot = CR.Range.first + Size/2; - splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB); - return true; -} - -void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, - CaseRecVector &WorkList, - const Value *SV, - MachineBasicBlock *SwitchBB) { - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - CaseRange LHSR(CR.Range.first, Pivot); - CaseRange RHSR(Pivot, CR.Range.second); - const ConstantInt *C = Pivot->Low; - MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; - - // We know that we branch to the LHS if the Value being switched on is - // less than the Pivot value, C. We use this to optimize our binary - // tree a bit, by recognizing that if SV is greater than or equal to the - // LHS's Case Value, and that Case Value is exactly one less than the - // Pivot's Value, then we can branch directly to the LHS's Target, - // rather than creating a leaf node for it. - if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE && - C->getValue() == (CR.GE->getValue() + 1LL)) { - TrueBB = LHSR.first->BB; - } else { - TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, TrueBB); - WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - // Similar to the optimization above, if the Value being switched on is - // known to be less than the Constant CR.LT, and the current Case Value - // is CR.LT - 1, then we can branch directly to the target block for - // the current Case Value, rather than emitting a RHS leaf node for it. - if ((RHSR.second - RHSR.first) == 1 && CR.LT && - RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) { - FalseBB = RHSR.first->BB; - } else { - FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, FalseBB); - WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - // Create a CaseBlock record representing a conditional branch to - // the LHS node if the value being switched on SV is less than C. - // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); - - if (CR.CaseBB == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); -} - -/// handleBitTestsSwitchCase - if current case range has few destination and -/// range span less, than machine word bitwidth, encode case range into series -/// of masks and emit bit tests with these masks. -bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock* SwitchBB) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(); - unsigned IntPtrBits = PTy.getSizeInBits(); - - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, PTy)) - return false; - - size_t numCmps = 0; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - // Single case counts one, case range - two. - numCmps += (I->Low == I->High ? 1 : 2); - } - - // Count unique destinations - SmallSet Dests; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - Dests.insert(I->BB); - if (Dests.size() > 3) - // Don't bother the code below, if there are too much unique destinations - return false; - } - DEBUG(dbgs() << "Total number of unique destinations: " - << Dests.size() << '\n' - << "Total number of comparisons: " << numCmps << '\n'); - - // Compute span of values. - const APInt& minValue = FrontCase.Low->getValue(); - const APInt& maxValue = BackCase.High->getValue(); - APInt cmpRange = maxValue - minValue; - - DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' - << "Low bound: " << minValue << '\n' - << "High bound: " << maxValue << '\n'); - - if (cmpRange.uge(IntPtrBits) || - (!(Dests.size() == 1 && numCmps >= 3) && - !(Dests.size() == 2 && numCmps >= 5) && - !(Dests.size() >= 3 && numCmps >= 6))) - return false; - - DEBUG(dbgs() << "Emitting bit tests\n"); - APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); - - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. - if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { - cmpRange = maxValue; - } else { - lowBound = minValue; - } - - CaseBitsVector CasesBits; - unsigned i, count = 0; - - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { - MachineBasicBlock* Dest = I->BB; - for (i = 0; i < count; ++i) - if (Dest == CasesBits[i].BB) - break; - - if (i == count) { - assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); - count++; - } - - const APInt& lowValue = I->Low->getValue(); - const APInt& highValue = I->High->getValue(); - - uint64_t lo = (lowValue - lowBound).getZExtValue(); - uint64_t hi = (highValue - lowBound).getZExtValue(); - CasesBits[i].ExtraWeight += I->ExtraWeight; - - for (uint64_t j = lo; j <= hi; j++) { - CasesBits[i].Mask |= 1ULL << j; - CasesBits[i].Bits++; - } - - } - std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); - - BitTestInfo BTC; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - DEBUG(dbgs() << "Cases:\n"); - for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { - DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask - << ", Bits: " << CasesBits[i].Bits - << ", BB: " << CasesBits[i].BB << '\n'); - - MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, CaseBB); - BTC.push_back(BitTestCase(CasesBits[i].Mask, - CaseBB, - CasesBits[i].BB, CasesBits[i].ExtraWeight)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, MVT::Other, (CR.CaseBB == SwitchBB), - CR.CaseBB, Default, std::move(BTC)); - - if (CR.CaseBB == SwitchBB) - visitBitTestHeader(BTB, SwitchBB); - - BitTestCases.push_back(std::move(BTB)); - - return true; -} - -void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) { - BranchProbabilityInfo *BPI = FuncInfo.BPI; +void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { +#ifndef NDEBUG + for (const CaseCluster &CC : Clusters) + assert(CC.Low == CC.High && "Input clusters must be single-case"); +#endif - // Extract cases from the switch and sort them. - typedef std::pair CasePair; - std::vector Sorted; - Sorted.reserve(SI->getNumCases()); - for (auto I : SI->cases()) - Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex())); - std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) { - return a.first->getValue().slt(b.first->getValue()); + std::sort(Clusters.begin(), Clusters.end(), + [](const CaseCluster &a, const CaseCluster &b) { + return a.Low->getValue().slt(b.Low->getValue()); }); - // Merge adjacent cases with the same destination, build Cases vector. - assert(Cases.empty() && "Cases should be empty before Clusterify;"); - Cases.reserve(SI->getNumCases()); - MachineBasicBlock *PreviousSucc = nullptr; - for (CasePair &CP : Sorted) { - const ConstantInt *CaseVal = CP.first; - unsigned SuccIndex = CP.second; - MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)]; - uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0; - - if (PreviousSucc == Succ && - (CaseVal->getValue() - Cases.back().High->getValue()) == 1) { + // Merge adjacent clusters with the same destination. + const unsigned N = Clusters.size(); + unsigned DstIndex = 0; + for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { + CaseCluster &CC = Clusters[SrcIndex]; + const ConstantInt *CaseVal = CC.Low; + MachineBasicBlock *Succ = CC.MBB; + + if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && + (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { // If this case has the same successor and is a neighbour, merge it into // the previous cluster. - Cases.back().High = CaseVal; - Cases.back().ExtraWeight += Weight; + Clusters[DstIndex - 1].High = CaseVal; + Clusters[DstIndex - 1].Weight += CC.Weight; + assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); } else { - Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight)); + std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], + sizeof(Clusters[SrcIndex])); } - - PreviousSucc = Succ; } - - DEBUG({ - size_t numCmps = 0; - for (auto &I : Cases) - // A range counts double, since it requires two compares. - numCmps += I.Low != I.High ? 2 : 1; - - dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << '\n'; - }); + Clusters.resize(DstIndex); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, @@ -2702,90 +2093,6 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, BitTestCases[i].Parent = Last; } -void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBB; - - // Create a vector of Cases, sorted so that we can efficiently create a binary - // search tree from them. - CaseVector Cases; - Clusterify(Cases, &SI); - - // Get the default destination MBB. - MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - - if (isa(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && - !Cases.empty()) { - // Replace an unreachable default destination with the most popular case - // destination. - DenseMap Popularity; - unsigned MaxPop = 0; - const BasicBlock *MaxBB = nullptr; - for (auto I : SI.cases()) { - const BasicBlock *BB = I.getCaseSuccessor(); - if (++Popularity[BB] > MaxPop) { - MaxPop = Popularity[BB]; - MaxBB = BB; - } - } - - // Set new default. - assert(MaxPop > 0); - assert(MaxBB); - Default = FuncInfo.MBBMap[MaxBB]; - - // Remove cases that were pointing to the destination that is now the default. - Cases.erase(std::remove_if(Cases.begin(), Cases.end(), - [&](const Case &C) { return C.BB == Default; }), - Cases.end()); - } - - // If there is only the default destination, go there directly. - if (Cases.empty()) { - // Update machine-CFG edges. - SwitchMBB->addSuccessor(Default); - - // If this is not a fall-through branch, emit the branch. - if (Default != NextBlock(SwitchMBB)) { - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, - getControlRoot(), DAG.getBasicBlock(Default))); - } - return; - } - - // Get the Value to be switched on. - const Value *SV = SI.getCondition(); - - // Push the initial CaseRec onto the worklist - CaseRecVector WorkList; - WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, - CaseRange(Cases.begin(),Cases.end()))); - - while (!WorkList.empty()) { - // Grab a record representing a case range to process off the worklist - CaseRec CR = WorkList.back(); - WorkList.pop_back(); - - if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // If the range has few cases (two or less) emit a series of specific - // tests. - if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // If the switch has more than N blocks, and is at least 40% dense, and the - // target supports indirect branches, then emit a jump table rather than - // lowering the switch to a binary tree of conditional branches. - // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). - if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // Emit binary tree. We need to pick a pivot, and push left and right ranges - // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); - } -} - void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; @@ -2950,19 +2257,51 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); - SDValue TrueVal = getValue(I.getOperand(1)); - SDValue FalseVal = getValue(I.getOperand(2)); + SDValue LHSVal = getValue(I.getOperand(1)); + SDValue RHSVal = getValue(I.getOperand(2)); + auto BaseOps = {Cond}; ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; - for (unsigned i = 0; i != NumValues; ++i) + // Min/max matching is only viable if all output VTs are the same. + if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { + Value *LHS, *RHS; + SelectPatternFlavor SPF = matchSelectPattern(const_cast(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPF) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + default: break; + } + + EVT VT = ValueVTs[0]; + LLVMContext &Ctx = *DAG.getContext(); + auto &TLI = DAG.getTargetLoweringInfo(); + while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + VT = TLI.getTypeToTransformTo(Ctx, VT); + + if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && + // If the underlying comparison instruction is used by any other instruction, + // the consumed instructions won't be destroyed, so it is not profitable + // to convert to a min/max. + cast(&I)->getCondition()->hasOneUse()) { + OpCode = Opc; + LHSVal = getValue(LHS); + RHSVal = getValue(RHS); + BaseOps = {}; + } + } + + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), - Cond, - SDValue(TrueVal.getNode(), - TrueVal.getResNo() + i), - SDValue(FalseVal.getNode(), - FalseVal.getResNo() + i)); + LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), + Ops); + } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); @@ -2994,10 +2333,11 @@ void SelectionDAGBuilder::visitSExt(const User &I) { void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, + DAG.getTargetConstant(0, dl, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I) { @@ -3053,19 +2393,20 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, dl, DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that // is not what we are looking for. Only regcognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast(I.getOperand(0))) - setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, /*isOpaque*/true)); else setValue(&I, N); // noop cast. @@ -3243,10 +2584,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); - else + else { + SDLoc dl = getCurSDLoc(); Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, - DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); + ISD::EXTRACT_SUBVECTOR, dl, VT, Src, + DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy())); + } } // Calculate new mask. @@ -3273,6 +2616,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(); + SDLoc dl = getCurSDLoc(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3284,14 +2628,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - EltVT, Src, DAG.getConstant(Idx, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3383,6 +2727,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Type *Ty = Op0->getType()->getScalarType(); unsigned AS = Ty->getPointerAddressSpace(); SDValue N = getValue(Op0); + SDLoc dl = getCurSDLoc(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3392,8 +2737,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, - DAG.getConstant(Offset, N.getValueType())); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, + DAG.getConstant(Offset, dl, N.getValueType())); } Ty = StTy->getElementType(Field); @@ -3408,8 +2753,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, PtrTy); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); + SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -3418,24 +2763,24 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If the index is smaller or larger than intptr_t, truncate or extend // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); + IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), + IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, - DAG.getConstant(Amt, IdxN.getValueType())); + DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), + SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } - N = DAG.getNode(ISD::ADD, getCurSDLoc(), + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); } } @@ -3449,6 +2794,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. + SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); @@ -3460,11 +2806,11 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { EVT IntPtr = TLI.getPointerTy(); if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); + AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); - AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, - DAG.getConstant(TySize, IntPtr)); + DAG.getConstant(TySize, dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -3476,18 +2822,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign-1)); + DAG.getIntPtrConstant(StackAlign - 1, dl)); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), + dl)); - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3505,7 +2852,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + + // The IR notion of invariant_load only guarantees that all *non-faulting* + // invariant loads result in the same value. The MI notion of invariant load + // guarantees that the load can be legally moved to any location within its + // containing function. The MI notion of invariant_load is stronger than the + // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load + // with a guarantee that the location being loaded from is dereferenceable + // throughout the function's lifetime. + + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && + isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -3535,8 +2892,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + SDLoc dl = getCurSDLoc(); + if (isVolatile) - Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector Values(NumValues); SmallVector Chains(std::min(unsigned(MaxParallelChains), @@ -3552,15 +2911,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), + SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, AAInfo, Ranges); @@ -3570,7 +2929,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); @@ -3578,7 +2937,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PendingLoads.push_back(Chain); } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values)); } @@ -3610,6 +2969,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); + SDLoc dl = getCurSDLoc(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -3618,21 +2978,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue St = DAG.getStore(Root, getCurSDLoc(), + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue St = DAG.getStore(Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), isVolatile, isNonTemporal, Alignment, AAInfo); Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } @@ -3664,12 +3024,140 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } -void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { +// Gather/scatter receive a vector of pointers. +// This vector of pointers may be represented as a base pointer + vector of +// indices, it depends on GEP and instruction preceeding GEP +// that calculates indices +static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, + SelectionDAGBuilder* SDB) { + + assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + GetElementPtrInst *Gep = dyn_cast(Ptr); + if (!Gep || Gep->getNumOperands() > 2) + return false; + ShuffleVectorInst *ShuffleInst = + dyn_cast(Gep->getPointerOperand()); + if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || + cast(ShuffleInst->getOperand(0))->getOpcode() != + Instruction::InsertElement) + return false; + + Ptr = cast(ShuffleInst->getOperand(0))->getOperand(1); + + SelectionDAG& DAG = SDB->DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Check is the Ptr is inside current basic block + // If not, look for the shuffle instruction + if (SDB->findValue(Ptr)) + Base = SDB->getValue(Ptr); + else if (SDB->findValue(ShuffleInst)) { + SDValue ShuffleNode = SDB->getValue(ShuffleInst); + SDLoc sdl = ShuffleNode; + Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, + ShuffleNode.getValueType().getScalarType(), ShuffleNode, + DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); + SDB->setValue(Ptr, Base); + } + else + return false; + + Value *IndexVal = Gep->getOperand(1); + if (SDB->findValue(IndexVal)) { + Index = SDB->getValue(IndexVal); + + if (SExtInst* Sext = dyn_cast(IndexVal)) { + IndexVal = Sext->getOperand(0); + if (SDB->findValue(IndexVal)) + Index = SDB->getValue(IndexVal); + } + return true; + } + return false; +} + +void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + Value *Ptr = I.getArgOperand(1); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + + Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO); + DAG.setRoot(Scatter); + setValue(&I, Scatter); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue InChain = DAG.getRoot(); + if (AA->pointsToConstantMemory( + AliasAnalysis::Location(PtrOperand, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + InChain = DAG.getEntryNode(); + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, + ISD::NON_EXTLOAD); + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + setValue(&I, Load); +} + +void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) - Value *PtrOperand = I.getArgOperand(0); - SDValue Ptr = getValue(PtrOperand); + // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) + Value *Ptr = I.getArgOperand(0); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); @@ -3683,26 +3171,39 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory( - AliasAnalysis::Location(PtrOperand, - AA->getTypeStoreSize(I.getType()), + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool ConstantMemory = false; + if (UniformBase && AA->pointsToConstantMemory( + AliasAnalysis::Location(BasePtr, + AA->getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. - InChain = DAG.getEntryNode(); + Root = DAG.getEntryNode(); + ConstantMemory = true; } MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOLoad, VT.getStoreSize(), - Alignment, AAInfo, Ranges); + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD); - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); - setValue(&I, Load); + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, + Ops, MMO); + + SDValue OutChain = Gather.getValue(1); + if (!ConstantMemory) + PendingLoads.push_back(OutChain); + setValue(&I, Gather); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { @@ -3769,8 +3270,8 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3859,7 +3360,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3919,9 +3421,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x007fffff, MVT::i32)); + DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, - DAG.getConstant(0x3f800000, MVT::i32)); + DAG.getConstant(0x3f800000, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } @@ -3934,18 +3436,18 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x7f800000, MVT::i32)); + DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, TLI.getPointerTy())); + DAG.getConstant(23, dl, TLI.getPointerTy())); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, - DAG.getConstant(127, MVT::i32)); + DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), +getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, MVT::f32); } @@ -3961,7 +3463,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode( ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, DAG.getTargetLoweringInfo().getPointerTy())); + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy())); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { @@ -3973,12 +3475,12 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); + getF32Constant(DAG, 0x3e814304, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); + getF32Constant(DAG, 0x3f3c50c8, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); + getF32Constant(DAG, 0x3f7f5e7e, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3989,15 +3491,15 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); + getF32Constant(DAG, 0x3da235e3, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); + getF32Constant(DAG, 0x3e65b8f3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); + getF32Constant(DAG, 0x3f324b07, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); + getF32Constant(DAG, 0x3f7ff8fd, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4010,24 +3512,24 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); + getF32Constant(DAG, 0x3924b03e, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); + getF32Constant(DAG, 0x3ab24b87, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); + getF32Constant(DAG, 0x3c1d8c17, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); + getF32Constant(DAG, 0x3d634a1d, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); + getF32Constant(DAG, 0x3e75fe14, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); + getF32Constant(DAG, 0x3f317234, dl)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); + getF32Constant(DAG, 0x3f800000, dl)); } // Add the exponent into the result in integer domain. @@ -4049,7 +3551,7 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b)); + getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -4068,7 +3570,7 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218)); + getF32Constant(DAG, 0x3f317218, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -4084,12 +3586,12 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbe74c456)); + getF32Constant(DAG, 0xbe74c456, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3fb3a2b1)); + getF32Constant(DAG, 0x3fb3a2b1, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f949a29)); + getF32Constant(DAG, 0x3f949a29, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4101,18 +3603,18 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbd67b6d6)); + getF32Constant(DAG, 0xbd67b6d6, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ee4f4b8)); + getF32Constant(DAG, 0x3ee4f4b8, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fbc278b)); + getF32Constant(DAG, 0x3fbc278b, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40348e95)); + getF32Constant(DAG, 0x40348e95, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fdef31a)); + getF32Constant(DAG, 0x3fdef31a, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4126,24 +3628,24 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbc91e5ac)); + getF32Constant(DAG, 0xbc91e5ac, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e4350aa)); + getF32Constant(DAG, 0x3e4350aa, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f60d3e3)); + getF32Constant(DAG, 0x3f60d3e3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x4011cdf0)); + getF32Constant(DAG, 0x4011cdf0, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x406cfd1c)); + getF32Constant(DAG, 0x406cfd1c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x408797cb)); + getF32Constant(DAG, 0x408797cb, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4006dcab)); + getF32Constant(DAG, 0x4006dcab, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); @@ -4178,12 +3680,12 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbeb08fe0)); + getF32Constant(DAG, 0xbeb08fe0, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x40019463)); + getF32Constant(DAG, 0x40019463, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fd6633d)); + getF32Constant(DAG, 0x3fd6633d, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4195,18 +3697,18 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbda7262e)); + getF32Constant(DAG, 0xbda7262e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f25280b)); + getF32Constant(DAG, 0x3f25280b, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x4007b923)); + getF32Constant(DAG, 0x4007b923, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40823e2f)); + getF32Constant(DAG, 0x40823e2f, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x4020d29c)); + getF32Constant(DAG, 0x4020d29c, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4221,24 +3723,24 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbcd2769e)); + getF32Constant(DAG, 0xbcd2769e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e8ce0b9)); + getF32Constant(DAG, 0x3e8ce0b9, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fa22ae7)); + getF32Constant(DAG, 0x3fa22ae7, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40525723)); + getF32Constant(DAG, 0x40525723, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x40aaf200)); + getF32Constant(DAG, 0x40aaf200, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x40c39dad)); + getF32Constant(DAG, 0x40c39dad, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4042902c)); + getF32Constant(DAG, 0x4042902c, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); @@ -4259,7 +3761,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3e9a209a)); + getF32Constant(DAG, 0x3e9a209a, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -4275,12 +3777,12 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbdd49a13)); + getF32Constant(DAG, 0xbdd49a13, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f1c0789)); + getF32Constant(DAG, 0x3f1c0789, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f011300)); + getF32Constant(DAG, 0x3f011300, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4291,15 +3793,15 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3d431f31)); + getF32Constant(DAG, 0x3d431f31, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ea21fb2)); + getF32Constant(DAG, 0x3ea21fb2, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f6ae232)); + getF32Constant(DAG, 0x3f6ae232, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f25f7c3)); + getF32Constant(DAG, 0x3f25f7c3, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4312,21 +3814,21 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3c5d51ce)); + getF32Constant(DAG, 0x3c5d51ce, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e00685a)); + getF32Constant(DAG, 0x3e00685a, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3efb6798)); + getF32Constant(DAG, 0x3efb6798, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f88d192)); + getF32Constant(DAG, 0x3f88d192, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fc4316c)); + getF32Constant(DAG, 0x3fc4316c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3f57ce70)); + getF32Constant(DAG, 0x3f57ce70, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); @@ -4368,7 +3870,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, // #define LOG2OF10 3.3219281f // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, - getF32Constant(DAG, 0x40549a78)); + getF32Constant(DAG, 0x40549a78, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -4391,7 +3893,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // powi(x, 0) -> 1.0 if (Val == 0) - return DAG.getConstantFP(1.0, LHS.getValueType()); + return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); if (!F->hasFnAttribute(Attribute::OptimizeForSize) || @@ -4420,7 +3922,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), - DAG.getConstantFP(1.0, LHS.getValueType()), Res); + DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); return Res; } } @@ -4451,8 +3953,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( - const Value *V, MDLocalVariable *Variable, MDExpression *Expr, - MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; @@ -4463,8 +3965,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // Ignore inlined function arguments here. // // FIXME: Should we be checking DL->inlinedAt() to determine this? - DIVariable DV(Variable); - if (!DV->getScope()->getSubprogram()->describes(MF.getFunction())) + if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; Optional Op; @@ -4558,16 +4059,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); EVT VT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + Res = DAG.getNode(ISD::READ_REGISTER, sdl, + DAG.getVTList(VT, MVT::Other), Chain, RegName); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); - SDValue Chain = getValue(RegValue).getOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast(cast(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, @@ -4647,11 +4152,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); - DIVariable DIVar = Variable; - if (!Address || !DIVar) { + assert(Variable && "Missing variable"); + if (!Address) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } @@ -4672,9 +4177,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = - (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || - isa(Address)); + bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || + isa(Address); const AllocaInst *AI = dyn_cast(Address); @@ -4728,12 +4232,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast(I); - DIVariable DIVar = DI.getVariable(); - if (!DIVar) - return nullptr; + assert(DI.getVariable() && "Missing variable"); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4793,7 +4295,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); - Res = DAG.getConstant(TypeID, MVT::i32); + Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; } @@ -4819,7 +4321,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CfaArg.getValueType()), CfaArg); SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); + DAG.getConstant(0, sdl, TLI.getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -4858,9 +4360,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_gather: + visitMaskedGather(I); + return nullptr; case Intrinsic::masked_load: visitMaskedLoad(I); return nullptr; + case Intrinsic::masked_scatter: + visitMaskedScatter(I); + return nullptr; case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; @@ -4913,12 +4421,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // We must do this early because v2i32 is not a legal type. SDValue ShOps[2]; ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); + ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI.getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, MVT::i32), + DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); return nullptr; @@ -5060,7 +4568,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, getValue(I.getArgOperand(0)), - DAG.getTargetConstant(0, MVT::i32)))); + DAG.getTargetConstant(0, sdl, + MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, @@ -5188,9 +4697,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); if (CI->isZero()) - Res = DAG.getConstant(-1ULL, Ty); + Res = DAG.getConstant(-1ULL, sdl, Ty); else - Res = DAG.getConstant(0, Ty); + Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); return nullptr; @@ -5460,6 +4969,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_begincatch: case Intrinsic::eh_endcatch: llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + case Intrinsic::eh_exceptioncode: { + unsigned Reg = TLI.getExceptionPointerRegister(); + assert(Reg && "cannot get exception code on this platform"); + MVT PtrVT = TLI.getPointerTy(); + const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); + SDValue N = + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + setValue(&I, N); + return nullptr; + } } } @@ -5472,7 +4993,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().CreateTempSymbol(); + BeginLabel = MMI.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. @@ -5515,7 +5036,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. @@ -5660,7 +5181,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const ConstantInt *CSize = dyn_cast(Size); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); - setValue(&I, DAG.getConstant(0, CallVT)); + setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } @@ -5943,7 +5464,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - if (unsigned IID = F->getIntrinsicID()) { + if (Intrinsic::ID IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; @@ -6520,7 +6041,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } } - AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(), TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the @@ -6548,7 +6069,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, MVT::i32)); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6583,7 +6105,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { .AddInlineAsmOperands(OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber : InlineAsm::Kind_RegDef, - false, 0, DAG, AsmNodeOperands); + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -6638,11 +6160,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } } + SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, - true, OpInfo.getMatchedOperand(), + true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); break; } @@ -6655,7 +6178,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; @@ -6682,6 +6205,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; @@ -6700,7 +6224,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, MVT::i32)); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6728,11 +6254,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + SDLoc dl = getCurSDLoc(); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, - DAG, AsmNodeOperands); + dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { @@ -6740,7 +6268,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, - false, 0, DAG, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } @@ -6864,7 +6392,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { std::pair SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool UseVoidTy, + Type *ReturnTy, MachineBasicBlock *LandingPad, bool IsPatchPoint) { TargetLowering::ArgListTy Args; @@ -6885,10 +6413,9 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); return lowerInvokable(CLI, LandingPad); @@ -6912,15 +6439,15 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, - SmallVectorImpl &Ops, + SDLoc DL, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast(OpVal)) { Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); Ops.push_back( - Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back( @@ -6942,7 +6469,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledValue()); - NullPtr = DAG.getIntPtrConstant(0, true); + NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguemnts // passed to it) and emits NOPS (if requested). Unlike the patchpoint @@ -6960,13 +6487,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // Add the and constants. SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast(IDVal)->getZExtValue(), MVT::i64)); + cast(IDVal)->getZExtValue(), DL, MVT::i64)); SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast(NBytesVal)->getZExtValue(), MVT::i32)); + cast(NBytesVal)->getZExtValue(), DL, + MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(&CI, 2, Ops, *this); + addStackMapLiveVars(&CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -7005,7 +6533,17 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, CallingConv::ID CC = CS.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !CS->getType()->isVoidTy(); - SDValue Callee = getValue(CS->getOperand(2)); // + SDLoc dl = getCurSDLoc(); + SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); + + // Handle immediate and symbolic callees. + if (auto* ConstCallee = dyn_cast(Callee)) + Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, + /*isTarget=*/true); + else if (auto* SymbolicCallee = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), + SDLoc(SymbolicCallee), + SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); @@ -7019,8 +6557,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + Type *ReturnTy = + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); std::pair Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, + lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, LandingPad, true); SDNode *CallEnd = Result.second.getNode(); @@ -7040,26 +6580,24 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Add the and constants. SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast(IDVal)->getZExtValue(), MVT::i64)); + cast(IDVal)->getZExtValue(), dl, MVT::i64)); SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast(NBytesVal)->getZExtValue(), MVT::i32)); + cast(NBytesVal)->getZExtValue(), dl, + MVT::i32)); - // Assume that the Callee is a constant address. - // FIXME: handle function symbols in the future. - Ops.push_back( - DAG.getIntPtrConstant(cast(Callee)->getZExtValue(), - /*isTarget=*/true)); + // Add the callee. + Ops.push_back(Callee); // Adjust to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; - Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); // Add the calling convention - Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); // Add the arguments we omitted previously. The register allocator should // place these in any free register. @@ -7072,7 +6610,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. - addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) @@ -7105,7 +6643,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, - getCurSDLoc(), NodeTys, Ops); + dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { @@ -7372,7 +6910,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, - CLI.DAG.getConstant(Offsets[i], PtrVT)); + CLI.DAG.getConstant(Offsets[i], CLI.DL, + PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, @@ -7817,3 +7356,796 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } +bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, + unsigned *TotalCases, unsigned First, + unsigned Last) { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); + + APInt LowCase = Clusters[First].Low->getValue(); + APInt HighCase = Clusters[Last].High->getValue(); + assert(LowCase.getBitWidth() == HighCase.getBitWidth()); + + // FIXME: A range of consecutive cases has 100% density, but only requires one + // comparison to lower. We should discriminate against such consecutive ranges + // in jump tables. + + uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); + uint64_t Range = Diff + 1; + + uint64_t NumCases = + TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); + + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + + return NumCases * 100 >= Range * MinJumpTableDensity; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); +} + +bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, + CaseCluster &JTCluster) { + assert(First <= Last); + + uint32_t Weight = 0; + unsigned NumCmps = 0; + std::vector Table; + DenseMap JTWeights; + for (unsigned I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Weight += Clusters[I].Weight; + assert(Weight >= Clusters[I].Weight && "Weight overflow!"); + APInt Low = Clusters[I].Low->getValue(); + APInt High = Clusters[I].High->getValue(); + NumCmps += (Low == High) ? 1 : 2; + if (I != First) { + // Fill the gap between this and the previous cluster. + APInt PreviousHigh = Clusters[I - 1].High->getValue(); + assert(PreviousHigh.slt(Low)); + uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; + for (uint64_t J = 0; J < Gap; J++) + Table.push_back(DefaultMBB); + } + uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; + for (uint64_t J = 0; J < ClusterSize; ++J) + Table.push_back(Clusters[I].MBB); + JTWeights[Clusters[I].MBB] += Clusters[I].Weight; + } + + unsigned NumDests = JTWeights.size(); + if (isSuitableForBitTests(NumDests, NumCmps, + Clusters[First].Low->getValue(), + Clusters[Last].High->getValue())) { + // Clusters[First..Last] should be lowered as bit tests instead. + return false; + } + + // Create the MBB that will load from and jump through the table. + // Note: We create it here, but it's not inserted into the function yet. + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *JumpTableMBB = + CurMF->CreateMachineBasicBlock(SI->getParent()); + + // Add successors. Note: use table order for determinism. + SmallPtrSet Done; + for (MachineBasicBlock *Succ : Table) { + if (Done.count(Succ)) + continue; + addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]); + Done.insert(Succ); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) + ->createJumpTableIndex(Table); + + // Set up the jump table info. + JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); + JumpTableHeader JTH(Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), SI->getCondition(), + nullptr, false); + JTCases.emplace_back(std::move(JTH), std::move(JT)); + + JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, + JTCases.size() - 1, Weight); + return true; +} + +void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB) { +#ifndef NDEBUG + // Clusters must be non-empty, sorted, and only contain Range clusters. + assert(!Clusters.empty()); + for (CaseCluster &C : Clusters) + assert(C.Kind == CC_Range); + for (unsigned i = 1, e = Clusters.size(); i < e; ++i) + assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!areJTsAllowed(TLI)) + return; + + const int64_t N = Clusters.size(); + const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + + // Split Clusters into minimum number of dense partitions. The algorithm uses + // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code + // for the Case Statement'" (1994), but builds the MinPartitions array in + // reverse order to make it easier to reconstruct the partitions in ascending + // order. In the choice between two optimal partitionings, it picks the one + // which yields more jump tables. + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector LastElement(N); + // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. + SmallVector NumTables(N); + // TotalCases[i]: Total nbr of cases in Clusters[0..i]. + SmallVector TotalCases(N); + + for (unsigned i = 0; i < N; ++i) { + APInt Hi = Clusters[i].High->getValue(); + APInt Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + assert(MinJumpTableSize > 1); + NumTables[N - 1] = 0; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; i--) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + NumTables[i] = NumTables[i + 1]; + + // Search for a solution that results in fewer partitions. + for (int64_t j = N - 1; j > i; j--) { + // Try building a partition from Clusters[i..j]. + if (isDense(Clusters, &TotalCases[0], i, j)) { + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + bool IsTable = j - i + 1 >= MinJumpTableSize; + unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); + + // If this j leads to fewer partitions, or same number of partitions + // with more lookup tables, it is a better partitioning. + if (NumPartitions < MinPartitions[i] || + (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + NumTables[i] = Tables; + } + } + } + } + + // Iterate over the partitions, replacing some with jump tables in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(Last >= First); + assert(DstIndex <= First); + unsigned NumClusters = Last - First + 1; + + CaseCluster JTCluster; + if (NumClusters >= MinJumpTableSize && + buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { + Clusters[DstIndex++] = JTCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { + // FIXME: Using the pointer type doesn't seem ideal. + uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; + return Range <= BW; +} + +bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, + unsigned NumCmps, + const APInt &Low, + const APInt &High) { + // FIXME: I don't think NumCmps is the correct metric: a single case and a + // range of cases both require only one branch to lower. Just looking at the + // number of clusters and destinations should be enough to decide whether to + // build bit tests. + + // To lower a range with bit tests, the range must fit the bitwidth of a + // machine word. + if (!rangeFitsInWord(Low, High)) + return false; + + // Decide whether it's profitable to lower this range with bit tests. Each + // destination requires a bit test and branch, and there is an overall range + // check branch. For a small number of clusters, separate comparisons might be + // cheaper, and for many destinations, splitting the range might be better. + return (NumDests == 1 && NumCmps >= 3) || + (NumDests == 2 && NumCmps >= 5) || + (NumDests == 3 && NumCmps >= 6); +} + +bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + CaseCluster &BTCluster) { + assert(First <= Last); + if (First == Last) + return false; + + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + unsigned NumCmps = 0; + for (int64_t I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Dests.set(Clusters[I].MBB->getNumber()); + NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + } + unsigned NumDests = Dests.count(); + + APInt Low = Clusters[First].Low->getValue(); + APInt High = Clusters[Last].High->getValue(); + assert(Low.slt(High)); + + if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + return false; + + APInt LowBound; + APInt CmpRange; + + const int BitWidth = + DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; + assert(Range <= (uint64_t)BitWidth && "Case range must fit in bit mask!"); + + if (Low.isNonNegative() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + LowBound = APInt::getNullValue(Low.getBitWidth()); + CmpRange = High; + } else { + LowBound = Low; + CmpRange = High - Low; + } + + CaseBitsVector CBV; + uint32_t TotalWeight = 0; + for (unsigned i = First; i <= Last; ++i) { + // Find the CaseBits for this destination. + unsigned j; + for (j = 0; j < CBV.size(); ++j) + if (CBV[j].BB == Clusters[i].MBB) + break; + if (j == CBV.size()) + CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0)); + CaseBits *CB = &CBV[j]; + + // Update Mask, Bits and ExtraWeight. + uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); + uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); + for (uint64_t j = Lo; j <= Hi; ++j) { + CB->Mask |= 1ULL << j; + CB->Bits++; + } + CB->ExtraWeight += Clusters[i].Weight; + TotalWeight += Clusters[i].Weight; + assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); + } + + BitTestInfo BTI; + std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { + // Sort by weight first, number of bits second. + if (a.ExtraWeight != b.ExtraWeight) + return a.ExtraWeight > b.ExtraWeight; + return a.Bits > b.Bits; + }); + + for (auto &CB : CBV) { + MachineBasicBlock *BitTestBB = + FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); + } + BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), + SI->getCondition(), -1U, MVT::Other, false, nullptr, + nullptr, std::move(BTI)); + + BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, + BitTestCases.size() - 1, TotalWeight); + return true; +} + +void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, + const SwitchInst *SI) { +// Partition Clusters into as few subsets as possible, where each subset has a +// range that fits in a machine word and has <= 3 unique destinations. + +#ifndef NDEBUG + // Clusters must be sorted and contain Range or JumpTable clusters. + assert(!Clusters.empty()); + assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); + for (const CaseCluster &C : Clusters) + assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); + for (unsigned i = 1; i < Clusters.size(); ++i) + assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + // If target does not have legal shift left, do not emit bit tests at all. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PTy = TLI.getPointerTy(); + if (!TLI.isOperationLegal(ISD::SHL, PTy)) + return; + + int BitWidth = PTy.getSizeInBits(); + const int64_t N = Clusters.size(); + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector LastElement(N); + + // FIXME: This might not be the best algorithm for finding bit test clusters. + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; --i) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + + // Search for a solution that results in fewer partitions. + // Note: the search is limited by BitWidth, reducing time complexity. + for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { + // Try building a partition from Clusters[i..j]. + + // Check the range. + if (!rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue())) + continue; + + // Check nbr of destinations and cluster types. + // FIXME: This works, but doesn't seem very efficient. + bool RangesOnly = true; + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + for (int64_t k = i; k <= j; k++) { + if (Clusters[k].Kind != CC_Range) { + RangesOnly = false; + break; + } + Dests.set(Clusters[k].MBB->getNumber()); + } + if (!RangesOnly || Dests.count() > 3) + break; + + // Check if it's a better partition. + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + if (NumPartitions < MinPartitions[i]) { + // Found a better partition. + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + } + } + } + + // Iterate over the partitions, replacing with bit-test clusters in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(First <= Last); + assert(DstIndex <= First); + + CaseCluster BitTestCluster; + if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { + Clusters[DstIndex++] = BitTestCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB) { + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *NextMBB = nullptr; + MachineFunction::iterator BBI = W.MBB; + if (++BBI != FuncInfo.MF->end()) + NextMBB = BBI; + + unsigned Size = W.LastCluster - W.FirstCluster + 1; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + + if (Size == 2 && W.MBB == SwitchMBB) { + // If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 + // cases. + // TODO: Handle cases where W.CaseBB != SwitchBB. + CaseCluster &Small = *W.FirstCluster; + CaseCluster &Big = *W.LastCluster; + + if (Small.Low == Small.High && Big.Low == Big.High && + Small.MBB == Big.MBB) { + const APInt &SmallValue = Small.Low->getValue(); + const APInt &BigValue = Big.Low->getValue(); + + // Check that there is only one bit different. + if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && + (SmallValue | BigValue) == BigValue) { + // Isolate the common bit. + APInt CommonBit = BigValue & ~SmallValue; + assert((SmallValue | CommonBit) == BigValue && + CommonBit.countPopulation() == 1 && "Not a common bit?"); + + SDValue CondLHS = getValue(Cond); + EVT VT = CondLHS.getValueType(); + SDLoc DL = getCurSDLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, DL, VT)); + SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or, + DAG.getConstant(BigValue, DL, VT), + ISD::SETEQ); + + // Update successor info. + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight); + addSuccessorWithWeight( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0) + : 0); + + // Insert the true branch. + SDValue BrCond = + DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(Small.MBB)); + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(DefaultMBB)); + + DAG.setRoot(BrCond); + return; + } + } + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Order cases by weight so the most likely case will be checked first. + std::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { + return a.Weight > b.Weight; + }); + + // Rearrange the case blocks so that the last one falls through if possible + // without without changing the order of weights. + for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { + --I; + if (I->Weight > W.LastCluster->Weight) + break; + if (I->Kind == CC_Range && I->MBB == NextMBB) { + std::swap(*I, *W.LastCluster); + break; + } + } + } + + // Compute total weight. + uint32_t UnhandledWeights = 0; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { + UnhandledWeights += I->Weight; + assert(UnhandledWeights >= I->Weight && "Weight overflow!"); + } + + MachineBasicBlock *CurMBB = W.MBB; + for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + MachineBasicBlock *Fallthrough; + if (I == W.LastCluster) { + // For the last cluster, fall through to the default destination. + Fallthrough = DefaultMBB; + } else { + Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->insert(BBI, Fallthrough); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + switch (I->Kind) { + case CC_JumpTable: { + // FIXME: Optimize away range check based on pivot comparisons. + JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; + JumpTable *JT = &JTCases[I->JTCasesIndex].second; + + // The jump block hasn't been inserted yet; insert it here. + MachineBasicBlock *JumpMBB = JT->MBB; + CurMF->insert(BBI, JumpMBB); + addSuccessorWithWeight(CurMBB, Fallthrough); + addSuccessorWithWeight(CurMBB, JumpMBB); + + // The jump table header will be inserted in our current block, do the + // range check, and fall through to our fallthrough block. + JTH->HeaderBB = CurMBB; + JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. + + // If we're in the right place, emit the jump table header right now. + if (CurMBB == SwitchMBB) { + visitJumpTableHeader(*JT, *JTH, SwitchMBB); + JTH->Emitted = true; + } + break; + } + case CC_BitTests: { + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; + + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + // If we're in the right place, emit the bit test header header right now. + if (CurMBB ==SwitchMBB) { + visitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + break; + } + case CC_Range: { + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->Low == I->High) { + // Check Cond == I->Low. + CC = ISD::SETEQ; + LHS = Cond; + RHS=I->Low; + MHS = nullptr; + } else { + // Check I->Low <= Cond <= I->High. + CC = ISD::SETLE; + LHS = I->Low; + MHS = Cond; + RHS = I->High; + } + + // The false weight is the sum of all unhandled cases. + UnhandledWeights -= I->Weight; + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, + UnhandledWeights); + + if (CurMBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); + + break; + } + } + CurMBB = Fallthrough; + } +} + +void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, + const SwitchWorkListItem &W, + Value *Cond, + MachineBasicBlock *SwitchMBB) { + assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && + "Clusters not sorted?"); + + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + // Balance the tree based on branch weights to create a near-optimal (in terms + // of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + uint32_t LeftWeight = LastLeft->Weight; + uint32_t RightWeight = FirstRight->Weight; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the weight on both + // sides. If LeftWeight and RightWeight are equal, alternate which side is + // taken to ensure 0-weight nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) + LeftWeight += (++LastLeft)->Weight; + else + RightWeight += (--FirstRight)->Weight; + I++; + } + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); + + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); + + CaseClusterIt FirstLeft = W.FirstCluster; + CaseClusterIt LastRight = W.LastCluster; + + const ConstantInt *Pivot = PivotCluster->Low; + + // New blocks will be inserted immediately after the current one. + MachineFunction::iterator BBI = W.MBB; + ++BBI; + + // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, + // we can branch to its destination directly if it's squeezed exactly in + // between the known lower bound and Pivot - 1. + MachineBasicBlock *LeftMBB; + if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && + FirstLeft->Low == W.GE && + (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { + LeftMBB = FirstLeft->MBB; + } else { + LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, LeftMBB); + WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a + // single cluster, RHS.Low == Pivot, and we can branch to its destination + // directly if RHS.High equals the current upper bound. + MachineBasicBlock *RightMBB; + if (FirstRight == LastRight && FirstRight->Kind == CC_Range && + W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { + RightMBB = FirstRight->MBB; + } else { + RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, RightMBB); + WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Create the CaseBlock record that will be used to lower the branch. + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + LeftWeight, RightWeight); + + if (W.MBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); +} + +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + // Extract cases from the switch. + BranchProbabilityInfo *BPI = FuncInfo.BPI; + CaseClusterVector Clusters; + Clusters.reserve(SI.getNumCases()); + for (auto I : SI.cases()) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; + const ConstantInt *CaseVal = I.getCaseValue(); + uint32_t Weight = + BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); + } + + MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Replace an unreachable default with the most popular destination. + // FIXME: Exploit unreachable default more aggressively. + bool UnreachableDefault = + isa(SI.getDefaultDest()->getFirstNonPHIOrDbg()); + if (UnreachableDefault && !Clusters.empty()) { + DenseMap Popularity; + unsigned MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } + // Set new default. + assert(MaxPop > 0 && MaxBB); + DefaultMBB = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the + // default. + CaseClusterVector New; + New.reserve(Clusters.size()); + for (CaseCluster &CC : Clusters) { + if (CC.MBB != DefaultMBB) + New.push_back(CC); + } + Clusters = std::move(New); + } + } + + // If there is only the default destination, jump there directly. + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + if (Clusters.empty()) { + SwitchMBB->addSuccessor(DefaultMBB); + if (DefaultMBB != NextBlock(SwitchMBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(DefaultMBB))); + } + return; + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + findJumpTables(Clusters, &SI, DefaultMBB); + findBitTestClusters(Clusters, &SI); + } + + + DEBUG({ + dbgs() << "Case clusters: "; + for (const CaseCluster &C : Clusters) { + if (C.Kind == CC_JumpTable) dbgs() << "JT:"; + if (C.Kind == CC_BitTests) dbgs() << "BT:"; + + C.Low->getValue().print(dbgs(), true); + if (C.Low != C.High) { + dbgs() << '-'; + C.High->getValue().print(dbgs(), true); + } + dbgs() << ' '; + } + dbgs() << '\n'; + }); + + assert(!Clusters.empty()); + SwitchWorkList WorkList; + CaseClusterIt First = Clusters.begin(); + CaseClusterIt Last = Clusters.end() - 1; + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + + while (!WorkList.empty()) { + SwitchWorkListItem W = WorkList.back(); + WorkList.pop_back(); + unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; + + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { + // For optimized builds, lower large range as a balanced binary tree. + splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); + continue; + } + + lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); + } +}