#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtarget.h"
#include "llvm/ADT/DenseMap.h"
STATISTIC(LoadsClustered, "Number of loads clustered together");
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf) {
-}
+ : ScheduleDAG(mf),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
ScheduleDAG::Run(bb, insertPos);
}
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->SchedulingPref = Old->SchedulingPref;
Old->isCloned = true;
return SU;
}
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
PhysReg = Reg;
const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+ TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
Cost = RC->getCopyCost();
}
}
static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
- VTs.push_back(N->getValueType(i));
+ SDNode *FlagDestNode = Flag.getNode();
+
+ // Don't add a flag from a node to itself.
+ if (FlagDestNode == N) return;
+
+ // Don't add a flag to something which already has a flag.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
if (AddFlag)
VTs.push_back(MVT::Flag);
+
SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Ops.push_back(N->getOperand(i));
- if (Flag.getNode())
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
+ if (FlagDestNode)
Ops.push_back(Flag);
+
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+ MachineSDNode::mmo_iterator Begin = 0, End = 0;
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ if (MN) {
+ Begin = MN->memoperands_begin();
+ End = MN->memoperands_end();
+ }
+
DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+ // Reset the memory references
+ if (MN)
+ MN->setMemRefs(Begin, End);
}
/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
/// offsets are not far apart (target specific), it add MVT::Flag inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
-void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = 0;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
SmallPtrSet<SDNode*, 16> Visited;
SmallVector<int64_t, 4> Offsets;
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
- for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); NI != E; ++NI) {
- SDNode *Node = &*NI;
- if (!Node || !Node->isMachineOpcode())
+ bool Cluster = false;
+ SDNode *Base = Node;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E; ++I) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User))
continue;
-
- unsigned Opc = Node->getMachineOpcode();
- const TargetInstrDesc &TID = TII->get(Opc);
- if (!TID.mayLoad())
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ }
- SDNode *Chain = 0;
- unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
- Chain = Node->getOperand(NumOps-1).getNode();
- if (!Chain)
- continue;
+ if (!Cluster)
+ return;
- // Look for other loads of the same chain. Find loads that are loading from
- // the same base pointer and different offsets.
- Visited.clear();
- Offsets.clear();
- O2SMap.clear();
- bool Cluster = false;
- SDNode *Base = Node;
- int64_t BaseOffset;
- for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
- I != E; ++I) {
- SDNode *User = *I;
- if (User == Node || !Visited.insert(User))
- continue;
- int64_t Offset1, Offset2;
- if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
- Offset1 == Offset2)
- // FIXME: Should be ok if they addresses are identical. But earlier
- // optimizations really should have eliminated one of the loads.
- continue;
- if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
- Offsets.push_back(Offset1);
- O2SMap.insert(std::make_pair(Offset2, User));
- Offsets.push_back(Offset2);
- if (Offset2 < Offset1) {
- Base = User;
- BaseOffset = Offset2;
- } else {
- BaseOffset = Offset1;
- }
- Cluster = true;
- }
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
- if (!Cluster)
- continue;
+ if (NumLoads == 0)
+ return;
- // Sort them in increasing order.
- std::sort(Offsets.begin(), Offsets.end());
-
- // Check if the loads are close enough.
- SmallVector<SDNode*, 4> Loads;
- unsigned NumLoads = 0;
- int64_t BaseOff = Offsets[0];
- SDNode *BaseLoad = O2SMap[BaseOff];
- Loads.push_back(BaseLoad);
- for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
- int64_t Offset = Offsets[i];
- SDNode *Load = O2SMap[Offset];
- if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
- NumLoads))
- break; // Stop right here. Ignore loads that are further away.
- Loads.push_back(Load);
- ++NumLoads;
- }
+ // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ AddFlags(Lead, SDValue(0, 0), true, DAG);
- if (NumLoads == 0)
+ SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutFlag = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ AddFlags(Load, InFlag, OutFlag, DAG);
+
+ if (OutFlag)
+ InFlag = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ SDNode *Node = &*NI;
+ if (!Node || !Node->isMachineOpcode())
continue;
- // Cluster loads by adding MVT::Flag outputs and inputs. This also
- // ensure they are scheduled in order of increasing addresses.
- SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0,0), true, DAG);
- SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
- for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
- bool OutFlag = i < e-1;
- SDNode *Load = Loads[i];
- AddFlags(Load, InFlag, OutFlag, DAG);
- if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues()-1);
- ++LoadsClustered;
- }
+ unsigned Opc = Node->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ if (TID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
}
}
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
- // Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
-
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
N->setNodeId(NodeSUnit->NodeNum);
// Assign the Latency field of NodeSUnit using target-provided information.
- if (UnitLatencies)
- NodeSUnit->Latency = 1;
- else
- ComputeLatency(NodeSUnit);
+ ComputeLatency(NodeSUnit);
}
}
if (Cost >= 0)
PhysReg = 0;
- const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
- OpSU->Latency, PhysReg);
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+ OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpSU, SU, (SDep &)dep);
- ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+ ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+ ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
SU->addPred(dep);
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
- // Cluster loads from "near" addresses into combined SUnits.
- ClusterNeighboringLoads();
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
// Populate the SUnits array.
BuildSchedUnits();
// Compute all the scheduling dependencies between nodes.
}
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
+ if (!InstrItins || InstrItins->isEmpty()) {
+ SU->Latency = 1;
+ return;
+ }
// Compute the latency for the node. We use the sum of the latencies for
// all nodes flagged together into this SUnit.
SU->Latency = 0;
for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
if (N->isMachineOpcode()) {
- SU->Latency += InstrItins.
+ SU->Latency += InstrItins->
getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
}
}
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
+ }
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+}
+
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
}
// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule use to determine how to insert dbg_value
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
// instructions in the right order.
static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
InstrEmitter &Emitter,
- DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM,
DenseMap<SDValue, unsigned> &VRBaseMap,
SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
SmallSet<unsigned, 8> &Seen) {
return;
MachineBasicBlock *BB = Emitter.getBlock();
- if (BB->empty() || BB->back().isPHI()) {
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
// Did not insert any instruction.
Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
return;
}
- Orders.push_back(std::make_pair(Order, &BB->back()));
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
if (!N->getHasDebugValue())
return;
// Opportunistically insert immediate dbg_value uses, i.e. those with source
continue;
unsigned DVOrder = DVs[i]->getOrder();
if (DVOrder == ++Order) {
- MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], BB, VRBaseMap, EM);
- Orders.push_back(std::make_pair(DVOrder, DbgMI));
- BB->insert(InsertPos, DbgMI);
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ if (DbgMI) {
+ Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ BB->insert(InsertPos, DbgMI);
+ }
DVs[i]->setIsInvalidated();
}
}
/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::
-EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
DenseMap<SUnit*, unsigned> CopyVRBaseMap;
SmallSet<unsigned, 8> Seen;
bool HasDbg = DAG->hasDebugValues();
+ // If this is the first BB, emit byval parameter dbg_value's.
+ if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+ SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+ SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+ for (; PDI != PDE; ++PDI) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+ if (DbgMI)
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
SUnit *SU = Sequence[i];
if (!SU) {
while (!FlaggedNodes.empty()) {
SDNode *N = FlaggedNodes.back();
Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
- VRBaseMap, EM);
- // Remember the the source order of the inserted instruction.
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
if (HasDbg)
- ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen);
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
FlaggedNodes.pop_back();
}
Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
- VRBaseMap, EM);
- // Remember the the source order of the inserted instruction.
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
if (HasDbg)
- ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders,
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
Seen);
}
- // Insert all the dbg_value which have not already been inserted in source
+ // Insert all the dbg_values which have not already been inserted in source
// order sequence.
if (HasDbg) {
- MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
- while (BBBegin != BB->end() && BBBegin->isPHI())
- ++BBBegin;
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
// Sort the source order instructions and use the order to insert debug
// values.
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
// Now emit the rest according to source order.
unsigned LastOrder = 0;
- MachineInstr *LastMI = 0;
for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
unsigned Order = Orders[i].first;
MachineInstr *MI = Orders[i].second;
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
- MachineBasicBlock *MIBB = MI->getParent();
+#ifndef NDEBUG
+ unsigned LastDIOrder = 0;
+#endif
for (; DI != DE &&
(*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+#ifndef NDEBUG
+ assert((*DI)->getOrder() >= LastDIOrder &&
+ "SDDbgValue nodes must be in source order!");
+ LastDIOrder = (*DI)->getOrder();
+#endif
if ((*DI)->isInvalidated())
continue;
- MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, MIBB, VRBaseMap, EM);
- if (!LastOrder)
- // Insert to start of the BB (after PHIs).
- BB->insert(BBBegin, DbgMI);
- else {
- MachineBasicBlock::iterator Pos = MI;
- MIBB->insert(llvm::next(Pos), DbgMI);
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(llvm::next(Pos), DbgMI);
+ }
}
}
LastOrder = Order;
- LastMI = MI;
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
MachineBasicBlock *InsertBB = Emitter.getBlock();
MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
if (!(*DI)->isInvalidated()) {
- MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, InsertBB, VRBaseMap, EM);
- InsertBB->insert(Pos, DbgMI);
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI)
+ InsertBB->insert(Pos, DbgMI);
}
++DI;
}