#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
}
}
-namespace {
- /// RegsForValue - This struct represents the registers (physical or virtual)
- /// that a particular set of values is assigned, and the type information
- /// about the value. The most common situation is to represent one value at a
- /// time, but struct or array values are handled element-wise as multiple
- /// values. The splitting of aggregates is performed recursively, so that we
- /// never have aggregate-typed registers. The values at this point do not
- /// necessarily have legal types, so each value may require one or more
- /// registers of some legal type.
- ///
- struct RegsForValue {
- /// ValueVTs - The value types of the values, which may not be legal, and
- /// may need be promoted or synthesized from one or more registers.
- ///
- SmallVector<EVT, 4> ValueVTs;
+RegsForValue::RegsForValue() {}
- /// RegVTs - The value types of the registers. This is the same size as
- /// ValueVTs and it records, for each value, what the type of the assigned
- /// register or registers are. (Individual values are never synthesized
- /// from more than one type of register.)
- ///
- /// With virtual registers, the contents of RegVTs is redundant with TLI's
- /// getRegisterType member function, however when with physical registers
- /// it is necessary to have a separate record of the types.
- ///
- SmallVector<MVT, 4> RegVTs;
-
- /// Regs - This list holds the registers assigned to the values.
- /// Each legal or promoted value requires one register, and each
- /// expanded value requires multiple registers.
- ///
- SmallVector<unsigned, 4> Regs;
-
- RegsForValue() {}
-
- RegsForValue(const SmallVector<unsigned, 4> ®s,
- MVT regvt, EVT valuevt)
- : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
-
- RegsForValue(LLVMContext &Context, const TargetLowering &tli,
- unsigned Reg, Type *Ty) {
- ComputeValueVTs(tli, Ty, ValueVTs);
-
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
- MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
- for (unsigned i = 0; i != NumRegs; ++i)
- Regs.push_back(Reg + i);
- RegVTs.push_back(RegisterVT);
- Reg += NumRegs;
- }
- }
+RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt,
+ EVT valuevt)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
- /// append - Add the specified values to this one.
- void append(const RegsForValue &RHS) {
- ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
- RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
- Regs.append(RHS.Regs.begin(), RHS.Regs.end());
- }
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+ unsigned Reg, Type *Ty) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
- /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
- /// this value and returns the result as a ValueVTs value. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
- SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
- SDLoc dl,
- SDValue &Chain, SDValue *Flag,
- const Value *V = nullptr) const;
-
- /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
- /// specified value into the registers specified by this object. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
- void
- getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
- SDValue *Flag, const Value *V,
- ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
-
- /// AddInlineAsmOperands - Add this value to the specified inlineasm node
- /// operand list. This adds the code marker, matching input operand index
- /// (if applicable), and includes the number of values added into it.
- void AddInlineAsmOperands(unsigned Kind,
- bool HasMatching, unsigned MatchingIdx, SDLoc dl,
- SelectionDAG &DAG,
- std::vector<SDValue> &Ops) const;
- };
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
}
/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
const DbgValueInst *DI = DDI.getDI();
DebugLoc dl = DDI.getdl();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
- MDLocalVariable *Variable = DI->getVariable();
- MDExpression *Expr = DI->getExpression();
+ DILocalVariable *Variable = DI->getVariable();
+ DIExpression *Expr = DI->getExpression();
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
uint64_t Offset = DI->getOffset();
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
- SDValue res;
+ SDValue Result;
if (It != FuncInfo.ValueMap.end()) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
Ty);
SDValue Chain = DAG.getEntryNode();
- res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
- resolveDanglingDebugInfo(V, res);
+ Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ resolveDanglingDebugInfo(V, Result);
}
- return res;
+ return Result;
}
/// getValue - Return an SDValue for the given Value.
Value *BasePtr = Ptr;
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
- Value *MemOpBasePtr = UniformBase ? BasePtr : NULL;
+ Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
MachineMemOperand::MOStore, VT.getStoreSize(),
MachineMemOperand *MMO =
DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : NULL),
- MachineMemOperand::MOLoad, VT.getStoreSize(),
- Alignment, AAInfo, Ranges);
+ getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
if (!UniformBase) {
Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
Index = getValue(Ptr);
}
-
SDValue Ops[] = { Root, Src0, Mask, Base, Index };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO);
/// argument, create the corresponding DBG_VALUE machine instruction for it now.
/// At the end of instruction selection, they will be inserted to the entry BB.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
- const Value *V, MDLocalVariable *Variable, MDExpression *Expr,
- MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
+ const Value *V, DILocalVariable *Variable, DIExpression *Expr,
+ DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
}
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
- MDLocalVariable *Variable = DI.getVariable();
- MDExpression *Expression = DI.getExpression();
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
const Value *Address = DI.getAddress();
assert(Variable && "Missing variable");
if (!Address) {
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
- MDLocalVariable *Variable = DI.getVariable();
- MDExpression *Expression = DI.getExpression();
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
uint64_t Offset = DI.getOffset();
const Value *V = DI.getValue();
if (!V)
case Intrinsic::masked_gather:
visitMaskedGather(I);
+ return nullptr;
case Intrinsic::masked_load:
visitMaskedLoad(I);
return nullptr;
case Intrinsic::masked_scatter:
visitMaskedScatter(I);
+ return nullptr;
case Intrinsic::masked_store:
visitMaskedStore(I);
return nullptr;
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
- bool UseVoidTy,
+ Type *ReturnTy,
MachineBasicBlock *LandingPad,
bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.push_back(Entry);
}
- Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
+ .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
.setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
return lowerInvokable(CLI, LandingPad);
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ Type *ReturnTy =
+ IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
std::pair<SDValue, SDValue> Result =
- lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC,
+ lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
LandingPad, true);
SDNode *CallEnd = Result.second.getNode();
assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
"Clusters not sorted?");
- unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
- assert(NumClusters >= 2 && "Too small to split!");
+ assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
+
+ // Balance the tree based on branch weights to create a near-optimal (in terms
+ // of search time given key frequency) binary search tree. See e.g. Kurt
+ // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
+ CaseClusterIt LastLeft = W.FirstCluster;
+ CaseClusterIt FirstRight = W.LastCluster;
+ uint32_t LeftWeight = LastLeft->Weight;
+ uint32_t RightWeight = FirstRight->Weight;
+
+ // Move LastLeft and FirstRight towards each other from opposite directions to
+ // find a partitioning of the clusters which balances the weight on both
+ // sides. If LeftWeight and RightWeight are equal, alternate which side is
+ // taken to ensure 0-weight nodes are distributed evenly.
+ unsigned I = 0;
+ while (LastLeft + 1 < FirstRight) {
+ if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
+ LeftWeight += (++LastLeft)->Weight;
+ else
+ RightWeight += (--FirstRight)->Weight;
+ I++;
+ }
+ assert(LastLeft + 1 == FirstRight);
+ assert(LastLeft >= W.FirstCluster);
+ assert(FirstRight <= W.LastCluster);
- // FIXME: When we have profile info, we might want to balance the tree based
- // on weights instead of node count.
+ // Use the first element on the right as pivot since we will make less-than
+ // comparisons against it.
+ CaseClusterIt PivotCluster = FirstRight;
+ assert(PivotCluster > W.FirstCluster);
+ assert(PivotCluster <= W.LastCluster);
- CaseClusterIt PivotCluster = W.FirstCluster + NumClusters / 2;
CaseClusterIt FirstLeft = W.FirstCluster;
- CaseClusterIt LastLeft = PivotCluster - 1;
- CaseClusterIt FirstRight = PivotCluster;
CaseClusterIt LastRight = W.LastCluster;
+
const ConstantInt *Pivot = PivotCluster->Low;
// New blocks will be inserted immediately after the current one.
}
// Create the CaseBlock record that will be used to lower the branch.
- CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB);
+ CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
+ LeftWeight, RightWeight);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
for (auto I : SI.cases()) {
MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
const ConstantInt *CaseVal = I.getCaseValue();
- uint32_t Weight = 0; // FIXME: Use 1 instead?
- if (BPI) {
- Weight = BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex());
- assert(Weight <= UINT32_MAX / SI.getNumSuccessors());
- }
+ uint32_t Weight =
+ BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
}
MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
- if (TM.getOptLevel() != CodeGenOpt::None) {
- // Cluster adjacent cases with the same destination.
- sortAndRangeify(Clusters);
+ // Cluster adjacent cases with the same destination. We do this at all
+ // optimization levels because it's cheap to do and will make codegen faster
+ // if there are many clusters.
+ sortAndRangeify(Clusters);
+ if (TM.getOptLevel() != CodeGenOpt::None) {
// Replace an unreachable default with the most popular destination.
// FIXME: Exploit unreachable default more aggressively.
bool UnreachableDefault =