X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGBuilder.cpp;h=1c14d4d8d5f83dd8519ca310c39552cef3f31182;hb=49baa9f89617593975fffd665b8c0b606f874aa0;hp=14421834a553a186f131779bb88bc0b2c0dfd026;hpb=defaf830f9a803fcc44e85c90b26542e38546a03;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 14421834a55..1c14d4d8d5f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -162,7 +162,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), + DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, TLI.getPointerTy())); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); @@ -209,7 +209,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getTargetConstant(1, TLI.getPointerTy())); + DAG.getTargetConstant(1, DL, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -302,7 +302,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -426,7 +426,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getIntPtrConstant(RoundBits)); + DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) @@ -453,9 +453,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(1)); + ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(0)); + ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); @@ -494,7 +494,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SmallVector Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getConstant(i, + ElementVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), @@ -521,7 +521,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); + PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -551,12 +552,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), + DAG.getConstant(i * (NumElements / NumIntermediates), DL, TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, DL, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -660,7 +661,7 @@ namespace { /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, + bool HasMatching, unsigned MatchingIdx, SDLoc dl, SelectionDAG &DAG, std::vector &Ops) const; }; @@ -722,7 +723,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // The current value is a zero. // Explicitly express that as it would be easier for // optimizations to kick in. - Parts[i] = DAG.getConstant(0, RegisterVT); + Parts[i] = DAG.getConstant(0, dl, RegisterVT); continue; } @@ -824,7 +825,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, + unsigned MatchingIdx, SDLoc dl, SelectionDAG &DAG, std::vector &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -844,7 +845,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); @@ -998,8 +999,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - MDLocalVariable *Variable = DI->getVariable(); - MDExpression *Expr = DI->getExpression(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); @@ -1059,6 +1060,12 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return Val; } +// Return true if SDValue exists for the given Value +bool SelectionDAGBuilder::findValue(const Value *V) const { + return (NodeMap.find(V) != NodeMap.end()) || + (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); +} + /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { @@ -1082,18 +1089,18 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) - return DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (isa(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, TLI.getPointerTy(AS)); + return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS)); } if (const ConstantFP *CFP = dyn_cast(C)) - return DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); if (isa(C) && !V->getType()->isAggregateType()) return DAG.getUNDEF(VT); @@ -1153,9 +1160,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) - Constants[i] = DAG.getConstantFP(0, EltVT); + Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Constants[i] = DAG.getConstant(0, EltVT); + Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); } return DAG.getMergeValues(Constants, getCurSDLoc()); @@ -1179,9 +1186,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { SDValue Op; if (EltVT.isFloatingPoint()) - Op = DAG.getConstantFP(0, EltVT); + Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Op = DAG.getConstant(0, EltVT); + Op = DAG.getConstant(0, getCurSDLoc(), EltVT); Ops.assign(NumElements, Op); } @@ -1238,7 +1245,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i])); + DAG.getIntPtrConstant(Offsets[i], + getCurSDLoc())); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -1683,7 +1691,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { - SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); @@ -1697,13 +1705,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, EVT VT = CmpOp.getValueType(); if (cast(CB.CmpLHS)->isMinValue(true)) { - Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, - VT, CmpOp, DAG.getConstant(Low, VT)); + VT, CmpOp, DAG.getConstant(Low, dl, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, - DAG.getConstant(High-Low, VT), ISD::SETULE); + DAG.getConstant(High-Low, dl, VT), ISD::SETULE); } } @@ -1718,7 +1726,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // fall through to the lhs instead of the rhs block. if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); - SDValue True = DAG.getConstant(1, Cond.getValueType()); + SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } @@ -1754,13 +1762,15 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(JTH.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(JTH.First, dl, VT)); // The SDNode we just created, which holds the value being switched on minus // the smallest case value, needs to be copied to a virtual register so it @@ -1768,10 +1778,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy()); unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1779,17 +1789,18 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), + ISD::SETUGT); - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); // Avoid emitting unnecessary branches to the next block. if (JT.MBB != NextBlock(SwitchBB)) - BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); @@ -1819,6 +1830,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); SDValue Guard; + SDLoc dl = getCurSDLoc(); // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the // guard value from the virtual register holding the value. Otherwise, emit a @@ -1826,34 +1838,34 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, unsigned GuardReg = SPD.getGuardReg(); if (GuardReg && TLI.useLoadStackGuardNode()) - Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, PtrTy); else - Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(FI), true, false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); SDValue Cmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), Sub.getValueType()), - Sub, DAG.getConstant(0, VT), ISD::SETNE); + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, StackSlot.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. - SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + SDValue Br = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(SPD.getSuccessMBB())); @@ -1881,18 +1893,20 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(B.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(B.First, dl, VT)); // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue RangeCmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; @@ -1909,26 +1923,25 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } if (UsePtrType) { VT = TLI.getPointerTy(); - Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); + Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), - B.Reg, Sub); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithWeight(SwitchBB, B.Default); addSuccessorWithWeight(SwitchBB, MBB); - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) - BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrRange, + BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); @@ -1941,9 +1954,9 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), - Reg, VT); + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; unsigned PopCount = countPopulation(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -1951,24 +1964,23 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, - DAG.getConstant(1, VT), ShiftOp); + SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, + DAG.getConstant(1, dl, VT), ShiftOp); // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), - VT, SwitchVal, DAG.getConstant(B.Mask, VT)); - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, - DAG.getConstant(0, VT), ISD::SETNE); + SDValue AndOp = DAG.getNode(ISD::AND, dl, + VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); + Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, + DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -1976,13 +1988,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); // Avoid emitting unnecessary branches to the next block. if (NextMBB != NextBlock(SwitchBB)) - BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, + BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); @@ -2055,6 +2067,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { return; SmallVector ValueVTs; + SDLoc dl = getCurSDLoc(); ComputeValueVTs(TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); @@ -2063,19 +2076,19 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { SDValue Ops[2]; if (FuncInfo.ExceptionPointerVirtReg) { Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + dl, ValueVTs[0]); } else { - Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy()); } Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[1]); + dl, ValueVTs[1]); // Merge into one. - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } @@ -2084,19 +2097,20 @@ unsigned SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, MachineBasicBlock *LPadBB) { SDValue Chain = getControlRoot(); + SDLoc dl = getCurSDLoc(); // Get the typeid that we will dispatch on later. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); - SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); - Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); + SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy()); + Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel); // Branch to the main landing pad block. MachineBasicBlock *ClauseMBB = FuncInfo.MBB; ClauseMBB->addSuccessor(LPadBB); - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain, DAG.getBasicBlock(LPadBB))); return VReg; } @@ -2126,6 +2140,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { // the previous cluster. Clusters[DstIndex - 1].High = CaseVal; Clusters[DstIndex - 1].Weight += CC.Weight; + assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); } else { std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], sizeof(Clusters[SrcIndex])); @@ -2355,10 +2370,11 @@ void SelectionDAGBuilder::visitSExt(const User &I) { void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, + DAG.getTargetConstant(0, dl, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I) { @@ -2414,19 +2430,20 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, dl, DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that // is not what we are looking for. Only regcognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast(I.getOperand(0))) - setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, /*isOpaque*/true)); else setValue(&I, N); // noop cast. @@ -2604,10 +2621,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); - else + else { + SDLoc dl = getCurSDLoc(); Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, - DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); + ISD::EXTRACT_SUBVECTOR, dl, VT, Src, + DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy())); + } } // Calculate new mask. @@ -2634,6 +2653,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(); + SDLoc dl = getCurSDLoc(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -2645,14 +2665,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - EltVT, Src, DAG.getConstant(Idx, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -2744,6 +2764,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Type *Ty = Op0->getType()->getScalarType(); unsigned AS = Ty->getPointerAddressSpace(); SDValue N = getValue(Op0); + SDLoc dl = getCurSDLoc(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -2753,8 +2774,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, - DAG.getConstant(Offset, N.getValueType())); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, + DAG.getConstant(Offset, dl, N.getValueType())); } Ty = StTy->getElementType(Field); @@ -2769,8 +2790,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, PtrTy); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); + SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -2779,24 +2800,24 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If the index is smaller or larger than intptr_t, truncate or extend // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); + IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), + IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, - DAG.getConstant(Amt, IdxN.getValueType())); + DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), + SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } - N = DAG.getNode(ISD::ADD, getCurSDLoc(), + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); } } @@ -2810,6 +2831,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. + SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); @@ -2821,11 +2843,11 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { EVT IntPtr = TLI.getPointerTy(); if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); + AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); - AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, - DAG.getConstant(TySize, IntPtr)); + DAG.getConstant(TySize, dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2837,18 +2859,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign-1)); + DAG.getIntPtrConstant(StackAlign - 1, dl)); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), + dl)); - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -2896,8 +2919,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + SDLoc dl = getCurSDLoc(); + if (isVolatile) - Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector Values(NumValues); SmallVector Chains(std::min(unsigned(MaxParallelChains), @@ -2913,15 +2938,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), + SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, AAInfo, Ranges); @@ -2931,7 +2956,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); @@ -2939,7 +2964,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PendingLoads.push_back(Chain); } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values)); } @@ -2971,6 +2996,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); + SDLoc dl = getCurSDLoc(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -2979,21 +3005,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue St = DAG.getStore(Root, getCurSDLoc(), + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue St = DAG.getStore(Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), isVolatile, isNonTemporal, Alignment, AAInfo); Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } @@ -3025,6 +3051,94 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } +// Gather/scatter receive a vector of pointers. +// This vector of pointers may be represented as a base pointer + vector of +// indices, it depends on GEP and instruction preceeding GEP +// that calculates indices +static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, + SelectionDAGBuilder* SDB) { + + assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + GetElementPtrInst *Gep = dyn_cast(Ptr); + if (!Gep || Gep->getNumOperands() > 2) + return false; + ShuffleVectorInst *ShuffleInst = + dyn_cast(Gep->getPointerOperand()); + if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || + cast(ShuffleInst->getOperand(0))->getOpcode() != + Instruction::InsertElement) + return false; + + Ptr = cast(ShuffleInst->getOperand(0))->getOperand(1); + + SelectionDAG& DAG = SDB->DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Check is the Ptr is inside current basic block + // If not, look for the shuffle instruction + if (SDB->findValue(Ptr)) + Base = SDB->getValue(Ptr); + else if (SDB->findValue(ShuffleInst)) { + SDValue ShuffleNode = SDB->getValue(ShuffleInst); + SDLoc sdl = ShuffleNode; + Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, + ShuffleNode.getValueType().getScalarType(), ShuffleNode, + DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); + SDB->setValue(Ptr, Base); + } + else + return false; + + Value *IndexVal = Gep->getOperand(1); + if (SDB->findValue(IndexVal)) { + Index = SDB->getValue(IndexVal); + + if (SExtInst* Sext = dyn_cast(IndexVal)) { + IndexVal = Sext->getOperand(0); + if (SDB->findValue(IndexVal)) + Index = SDB->getValue(IndexVal); + } + return true; + } + return false; +} + +void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + Value *Ptr = I.getArgOperand(1); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + + Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO); + DAG.setRoot(Scatter); + setValue(&I, Scatter); +} + void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDLoc sdl = getCurSDLoc(); @@ -3066,6 +3180,59 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { setValue(&I, Load); } +void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) + Value *Ptr = I.getArgOperand(0); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool ConstantMemory = false; + if (UniformBase && AA->pointsToConstantMemory( + AliasAnalysis::Location(BasePtr, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, + Ops, MMO); + + SDValue OutChain = Gather.getValue(1); + if (!ConstantMemory) + PendingLoads.push_back(OutChain); + setValue(&I, Gather); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -3130,8 +3297,8 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3220,7 +3387,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3280,9 +3448,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x007fffff, MVT::i32)); + DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, - DAG.getConstant(0x3f800000, MVT::i32)); + DAG.getConstant(0x3f800000, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } @@ -3295,18 +3463,18 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x7f800000, MVT::i32)); + DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, TLI.getPointerTy())); + DAG.getConstant(23, dl, TLI.getPointerTy())); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, - DAG.getConstant(127, MVT::i32)); + DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), +getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, MVT::f32); } @@ -3322,7 +3490,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode( ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, DAG.getTargetLoweringInfo().getPointerTy())); + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy())); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { @@ -3334,12 +3502,12 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // // error 0.0144103317, which is 6 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); + getF32Constant(DAG, 0x3e814304, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); + getF32Constant(DAG, 0x3f3c50c8, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); + getF32Constant(DAG, 0x3f7f5e7e, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3350,15 +3518,15 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // // error 0.000107046256, which is 13 to 14 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); + getF32Constant(DAG, 0x3da235e3, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); + getF32Constant(DAG, 0x3e65b8f3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); + getF32Constant(DAG, 0x3f324b07, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); + getF32Constant(DAG, 0x3f7ff8fd, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3371,24 +3539,24 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; // error 2.47208000*10^(-7), which is better than 18 bits SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); + getF32Constant(DAG, 0x3924b03e, dl)); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); + getF32Constant(DAG, 0x3ab24b87, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); + getF32Constant(DAG, 0x3c1d8c17, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); + getF32Constant(DAG, 0x3d634a1d, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); + getF32Constant(DAG, 0x3e75fe14, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); + getF32Constant(DAG, 0x3f317234, dl)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); + getF32Constant(DAG, 0x3f800000, dl)); } // Add the exponent into the result in integer domain. @@ -3410,7 +3578,7 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b)); + getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -3429,7 +3597,7 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218)); + getF32Constant(DAG, 0x3f317218, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -3445,12 +3613,12 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbe74c456)); + getF32Constant(DAG, 0xbe74c456, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3fb3a2b1)); + getF32Constant(DAG, 0x3fb3a2b1, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f949a29)); + getF32Constant(DAG, 0x3f949a29, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3462,18 +3630,18 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbd67b6d6)); + getF32Constant(DAG, 0xbd67b6d6, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ee4f4b8)); + getF32Constant(DAG, 0x3ee4f4b8, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fbc278b)); + getF32Constant(DAG, 0x3fbc278b, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40348e95)); + getF32Constant(DAG, 0x40348e95, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fdef31a)); + getF32Constant(DAG, 0x3fdef31a, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3487,24 +3655,24 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbc91e5ac)); + getF32Constant(DAG, 0xbc91e5ac, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e4350aa)); + getF32Constant(DAG, 0x3e4350aa, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f60d3e3)); + getF32Constant(DAG, 0x3f60d3e3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x4011cdf0)); + getF32Constant(DAG, 0x4011cdf0, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x406cfd1c)); + getF32Constant(DAG, 0x406cfd1c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x408797cb)); + getF32Constant(DAG, 0x408797cb, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4006dcab)); + getF32Constant(DAG, 0x4006dcab, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); @@ -3539,12 +3707,12 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbeb08fe0)); + getF32Constant(DAG, 0xbeb08fe0, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x40019463)); + getF32Constant(DAG, 0x40019463, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fd6633d)); + getF32Constant(DAG, 0x3fd6633d, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3556,18 +3724,18 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbda7262e)); + getF32Constant(DAG, 0xbda7262e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f25280b)); + getF32Constant(DAG, 0x3f25280b, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x4007b923)); + getF32Constant(DAG, 0x4007b923, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40823e2f)); + getF32Constant(DAG, 0x40823e2f, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x4020d29c)); + getF32Constant(DAG, 0x4020d29c, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3582,24 +3750,24 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbcd2769e)); + getF32Constant(DAG, 0xbcd2769e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e8ce0b9)); + getF32Constant(DAG, 0x3e8ce0b9, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fa22ae7)); + getF32Constant(DAG, 0x3fa22ae7, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40525723)); + getF32Constant(DAG, 0x40525723, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x40aaf200)); + getF32Constant(DAG, 0x40aaf200, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x40c39dad)); + getF32Constant(DAG, 0x40c39dad, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4042902c)); + getF32Constant(DAG, 0x4042902c, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); @@ -3620,7 +3788,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3e9a209a)); + getF32Constant(DAG, 0x3e9a209a, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -3636,12 +3804,12 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbdd49a13)); + getF32Constant(DAG, 0xbdd49a13, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f1c0789)); + getF32Constant(DAG, 0x3f1c0789, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f011300)); + getF32Constant(DAG, 0x3f011300, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3652,15 +3820,15 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3d431f31)); + getF32Constant(DAG, 0x3d431f31, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ea21fb2)); + getF32Constant(DAG, 0x3ea21fb2, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f6ae232)); + getF32Constant(DAG, 0x3f6ae232, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f25f7c3)); + getF32Constant(DAG, 0x3f25f7c3, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3673,21 +3841,21 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3c5d51ce)); + getF32Constant(DAG, 0x3c5d51ce, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e00685a)); + getF32Constant(DAG, 0x3e00685a, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3efb6798)); + getF32Constant(DAG, 0x3efb6798, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f88d192)); + getF32Constant(DAG, 0x3f88d192, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fc4316c)); + getF32Constant(DAG, 0x3fc4316c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3f57ce70)); + getF32Constant(DAG, 0x3f57ce70, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); @@ -3729,7 +3897,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, // #define LOG2OF10 3.3219281f // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, - getF32Constant(DAG, 0x40549a78)); + getF32Constant(DAG, 0x40549a78, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -3752,7 +3920,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // powi(x, 0) -> 1.0 if (Val == 0) - return DAG.getConstantFP(1.0, LHS.getValueType()); + return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); if (!F->hasFnAttribute(Attribute::OptimizeForSize) || @@ -3781,7 +3949,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), - DAG.getConstantFP(1.0, LHS.getValueType()), Res); + DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); return Res; } } @@ -3812,8 +3980,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( - const Value *V, MDLocalVariable *Variable, MDExpression *Expr, - MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; @@ -4007,8 +4175,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); if (!Address) { @@ -4089,8 +4257,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgValueInst &DI = cast(I); assert(DI.getVariable() && "Missing variable"); - MDLocalVariable *Variable = DI.getVariable(); - MDExpression *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4150,7 +4318,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); - Res = DAG.getConstant(TypeID, MVT::i32); + Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; } @@ -4176,7 +4344,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CfaArg.getValueType()), CfaArg); SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); + DAG.getConstant(0, sdl, TLI.getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -4215,9 +4383,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_gather: + visitMaskedGather(I); + return nullptr; case Intrinsic::masked_load: visitMaskedLoad(I); return nullptr; + case Intrinsic::masked_scatter: + visitMaskedScatter(I); + return nullptr; case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; @@ -4270,12 +4444,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // We must do this early because v2i32 is not a legal type. SDValue ShOps[2]; ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); + ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI.getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, MVT::i32), + DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); return nullptr; @@ -4417,7 +4591,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, getValue(I.getArgOperand(0)), - DAG.getTargetConstant(0, MVT::i32)))); + DAG.getTargetConstant(0, sdl, + MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, @@ -4545,9 +4720,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); if (CI->isZero()) - Res = DAG.getConstant(-1ULL, Ty); + Res = DAG.getConstant(-1ULL, sdl, Ty); else - Res = DAG.getConstant(0, Ty); + Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); return nullptr; @@ -4817,6 +4992,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_begincatch: case Intrinsic::eh_endcatch: llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + case Intrinsic::eh_exceptioncode: { + unsigned Reg = TLI.getExceptionPointerRegister(); + assert(Reg && "cannot get exception code on this platform"); + MVT PtrVT = TLI.getPointerTy(); + const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); + SDValue N = + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + setValue(&I, N); + return nullptr; + } } } @@ -5017,7 +5204,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const ConstantInt *CSize = dyn_cast(Size); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); - setValue(&I, DAG.getConstant(0, CallVT)); + setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } @@ -5877,7 +6064,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } } - AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(), TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the @@ -5905,7 +6092,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, MVT::i32)); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -5940,7 +6128,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { .AddInlineAsmOperands(OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber : InlineAsm::Kind_RegDef, - false, 0, DAG, AsmNodeOperands); + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -5995,11 +6183,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } } + SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, - true, OpInfo.getMatchedOperand(), + true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); break; } @@ -6012,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; @@ -6039,6 +6228,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; @@ -6057,7 +6247,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, MVT::i32)); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6085,11 +6277,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + SDLoc dl = getCurSDLoc(); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, - DAG, AsmNodeOperands); + dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { @@ -6097,7 +6291,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, - false, 0, DAG, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } @@ -6269,15 +6463,15 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, - SmallVectorImpl &Ops, + SDLoc DL, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast(OpVal)) { Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); Ops.push_back( - Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back( @@ -6299,7 +6493,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledValue()); - NullPtr = DAG.getIntPtrConstant(0, true); + NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguemnts // passed to it) and emits NOPS (if requested). Unlike the patchpoint @@ -6317,13 +6511,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // Add the and constants. SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast(IDVal)->getZExtValue(), MVT::i64)); + cast(IDVal)->getZExtValue(), DL, MVT::i64)); SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast(NBytesVal)->getZExtValue(), MVT::i32)); + cast(NBytesVal)->getZExtValue(), DL, + MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(&CI, 2, Ops, *this); + addStackMapLiveVars(&CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -6362,11 +6557,12 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, CallingConv::ID CC = CS.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !CS->getType()->isVoidTy(); + SDLoc dl = getCurSDLoc(); SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); // Handle immediate and symbolic callees. if (auto* ConstCallee = dyn_cast(Callee)) - Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), + Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, /*isTarget=*/true); else if (auto* SymbolicCallee = dyn_cast(Callee)) Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), @@ -6406,10 +6602,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Add the and constants. SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast(IDVal)->getZExtValue(), MVT::i64)); + cast(IDVal)->getZExtValue(), dl, MVT::i64)); SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast(NBytesVal)->getZExtValue(), MVT::i32)); + cast(NBytesVal)->getZExtValue(), dl, + MVT::i32)); // Add the callee. Ops.push_back(Callee); @@ -6419,10 +6616,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Call Node: Chain, Target, {Args}, RegMask, [Glue] unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; - Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); // Add the calling convention - Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); // Add the arguments we omitted previously. The register allocator should // place these in any free register. @@ -6435,7 +6632,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. - addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) @@ -6468,7 +6665,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, - getCurSDLoc(), NodeTys, Ops); + dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { @@ -6735,7 +6932,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, - CLI.DAG.getConstant(Offsets[i], PtrVT)); + CLI.DAG.getConstant(Offsets[i], CLI.DL, + PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, @@ -7218,13 +7416,14 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, CaseCluster &JTCluster) { assert(First <= Last); - uint64_t Weight = 0; + uint32_t Weight = 0; unsigned NumCmps = 0; std::vector Table; DenseMap JTWeights; for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Weight += Clusters[I].Weight; + assert(Weight >= Clusters[I].Weight && "Weight overflow!"); APInt Low = Clusters[I].Low->getValue(); APInt High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; @@ -7236,7 +7435,8 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, for (uint64_t J = 0; J < Gap; J++) Table.push_back(DefaultMBB); } - for (APInt X = Low; X.sle(High); ++X) + uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; + for (uint64_t J = 0; J < ClusterSize; ++J) Table.push_back(Clusters[I].MBB); JTWeights[Clusters[I].MBB] += Clusters[I].Weight; } @@ -7450,7 +7650,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, } CaseBitsVector CBV; - uint64_t TotalWeight = 0; + uint32_t TotalWeight = 0; for (unsigned i = First; i <= Last; ++i) { // Find the CaseBits for this destination. unsigned j; @@ -7470,11 +7670,14 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, } CB->ExtraWeight += Clusters[i].Weight; TotalWeight += Clusters[i].Weight; + assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); } BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // FIXME: Sort by weight. + // Sort by weight first, number of bits second. + if (a.ExtraWeight != b.ExtraWeight) + return a.ExtraWeight > b.ExtraWeight; return a.Bits > b.Bits; }); @@ -7628,9 +7831,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, - DAG.getConstant(CommonBit, VT)); + DAG.getConstant(CommonBit, DL, VT)); SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or, - DAG.getConstant(BigValue, VT), ISD::SETEQ); + DAG.getConstant(BigValue, DL, VT), + ISD::SETEQ); // Update successor info. // Both Small and Big will jump to Small.BB, so we sum up the weights. @@ -7662,11 +7866,12 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, return a.Weight > b.Weight; }); - // Rearrange the case blocks so that the last one falls through if possible. - // Start at the bottom as that's the case with the lowest weight. - // FIXME: Take branch probability into account. + // Rearrange the case blocks so that the last one falls through if possible + // without without changing the order of weights. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; + if (I->Weight > W.LastCluster->Weight) + break; if (I->Kind == CC_Range && I->MBB == NextMBB) { std::swap(*I, *W.LastCluster); break; @@ -7676,8 +7881,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // Compute total weight. uint32_t UnhandledWeights = 0; - for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { UnhandledWeights += I->Weight; + assert(UnhandledWeights >= I->Weight && "Weight overflow!"); + } MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { @@ -7779,14 +7986,41 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; assert(NumClusters >= 2 && "Too small to split!"); - // FIXME: When we have profile info, we might want to balance the tree based - // on weights instead of node count. + // Balance the tree based on branch weights to create a near-optimal (in terms + // of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + uint32_t LeftWeight = LastLeft->Weight; + uint32_t RightWeight = FirstRight->Weight; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the weight on both + // sides. + while (LastLeft + 1 < FirstRight) { + // Zero-weight nodes would cause skewed trees since they don't affect + // LeftWeight or RightWeight. + assert(LastLeft->Weight != 0); + assert(FirstRight->Weight != 0); + + if (LeftWeight < RightWeight) + LeftWeight += (++LastLeft)->Weight; + else + RightWeight += (--FirstRight)->Weight; + } + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); + + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); - CaseClusterIt PivotCluster = W.FirstCluster + NumClusters / 2; CaseClusterIt FirstLeft = W.FirstCluster; - CaseClusterIt LastLeft = PivotCluster - 1; - CaseClusterIt FirstRight = PivotCluster; CaseClusterIt LastRight = W.LastCluster; + const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. @@ -7825,7 +8059,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } // Create the CaseBlock record that will be used to lower the branch. - CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB); + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + LeftWeight, RightWeight); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -7841,9 +8076,11 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); - uint32_t Weight = 0; // FIXME: Use 1 instead? - if (BPI) + uint32_t Weight = 1; + if (BPI) { Weight = BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()); + assert(Weight <= UINT32_MAX / SI.getNumSuccessors()); + } Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); }