lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

   1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements the ScheduleDAG class, which is a base class used by
  11 // scheduling implementation classes.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "pre-RA-sched"
  16 #include "ScheduleDAGSDNodes.h"
  17 #include "InstrEmitter.h"
  18 #include "llvm/CodeGen/SelectionDAG.h"
  19 #include "llvm/Target/TargetMachine.h"
  20 #include "llvm/Target/TargetInstrInfo.h"
  21 #include "llvm/Target/TargetRegisterInfo.h"
  22 #include "llvm/Target/TargetSubtarget.h"
  23 #include "llvm/ADT/DenseMap.h"
  24 #include "llvm/ADT/SmallPtrSet.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/ADT/Statistic.h"
  27 #include "llvm/Support/Debug.h"
  28 #include "llvm/Support/raw_ostream.h"
  29 using namespace llvm;
  30
  31 STATISTIC(LoadsClustered, "Number of loads clustered together");
  32
  33 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
  34   : ScheduleDAG(mf) {
  35 }
  36
  37 /// Run - perform scheduling.
  38 ///
  39 void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
  40                              MachineBasicBlock::iterator insertPos) {
  41   DAG = dag;
  42   ScheduleDAG::Run(bb, insertPos);
  43 }
  44
  45 SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
  46   SUnit *SU = NewSUnit(Old->getNode());
  47   SU->OrigNode = Old->OrigNode;
  48   SU->Latency = Old->Latency;
  49   SU->isTwoAddress = Old->isTwoAddress;
  50   SU->isCommutable = Old->isCommutable;
  51   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
  52   SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
  53   Old->isCloned = true;
  54   return SU;
  55 }
  56
  57 /// CheckForPhysRegDependency - Check if the dependency between def and use of
  58 /// a specified operand is a physical register dependency. If so, returns the
  59 /// register and the cost of copying the register.
  60 static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
  61                                       const TargetRegisterInfo *TRI,
  62                                       const TargetInstrInfo *TII,
  63                                       unsigned &PhysReg, int &Cost) {
  64   if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
  65     return;
  66
  67   unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
  68   if (TargetRegisterInfo::isVirtualRegister(Reg))
  69     return;
  70
  71   unsigned ResNo = User->getOperand(2).getResNo();
  72   if (Def->isMachineOpcode()) {
  73     const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
  74     if (ResNo >= II.getNumDefs() &&
  75         II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
  76       PhysReg = Reg;
  77       const TargetRegisterClass *RC =
  78         TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
  79       Cost = RC->getCopyCost();
  80     }
  81   }
  82 }
  83
  84 static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
  85                      SelectionDAG *DAG) {
  86   SmallVector<EVT, 4> VTs;
  87   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
  88     VTs.push_back(N->getValueType(i));
  89   if (AddFlag)
  90     VTs.push_back(MVT::Flag);
  91   SmallVector<SDValue, 4> Ops;
  92   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
  93     Ops.push_back(N->getOperand(i));
  94   if (Flag.getNode())
  95     Ops.push_back(Flag);
  96   SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
  97   DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
  98 }
  99
 100 /// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
 101 /// This function finds loads of the same base and different offsets. If the
 102 /// offsets are not far apart (target specific), it add MVT::Flag inputs and
 103 /// outputs to ensure they are scheduled together and in order. This
 104 /// optimization may benefit some targets by improving cache locality.
 105 void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
 106   SmallPtrSet<SDNode*, 16> Visited;
 107   SmallVector<int64_t, 4> Offsets;
 108   DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
 109   for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
 110        E = DAG->allnodes_end(); NI != E; ++NI) {
 111     SDNode *Node = &*NI;
 112     if (!Node || !Node->isMachineOpcode())
 113       continue;
 114
 115     unsigned Opc = Node->getMachineOpcode();
 116     const TargetInstrDesc &TID = TII->get(Opc);
 117     if (!TID.mayLoad())
 118       continue;
 119
 120     SDNode *Chain = 0;
 121     unsigned NumOps = Node->getNumOperands();
 122     if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
 123       Chain = Node->getOperand(NumOps-1).getNode();
 124     if (!Chain)
 125       continue;
 126
 127     // Look for other loads of the same chain. Find loads that are loading from
 128     // the same base pointer and different offsets.
 129     Visited.clear();
 130     Offsets.clear();
 131     O2SMap.clear();
 132     bool Cluster = false;
 133     SDNode *Base = Node;
 134     int64_t BaseOffset;
 135     for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
 136          I != E; ++I) {
 137       SDNode *User = *I;
 138       if (User == Node || !Visited.insert(User))
 139         continue;
 140       int64_t Offset1, Offset2;
 141       if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
 142           Offset1 == Offset2)
 143         // FIXME: Should be ok if they addresses are identical. But earlier
 144         // optimizations really should have eliminated one of the loads.
 145         continue;
 146       if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
 147         Offsets.push_back(Offset1);
 148       O2SMap.insert(std::make_pair(Offset2, User));
 149       Offsets.push_back(Offset2);
 150       if (Offset2 < Offset1) {
 151         Base = User;
 152         BaseOffset = Offset2;
 153       } else {
 154         BaseOffset = Offset1;
 155       }
 156       Cluster = true;
 157     }
 158
 159     if (!Cluster)
 160       continue;
 161
 162     // Sort them in increasing order.
 163     std::sort(Offsets.begin(), Offsets.end());
 164
 165     // Check if the loads are close enough.
 166     SmallVector<SDNode*, 4> Loads;
 167     unsigned NumLoads = 0;
 168     int64_t BaseOff = Offsets[0];
 169     SDNode *BaseLoad = O2SMap[BaseOff];
 170     Loads.push_back(BaseLoad);
 171     for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
 172       int64_t Offset = Offsets[i];
 173       SDNode *Load = O2SMap[Offset];
 174       if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
 175                                         NumLoads))
 176         break; // Stop right here. Ignore loads that are further away.
 177       Loads.push_back(Load);
 178       ++NumLoads;
 179     }
 180
 181     if (NumLoads == 0)
 182       continue;
 183
 184     // Cluster loads by adding MVT::Flag outputs and inputs. This also
 185     // ensure they are scheduled in order of increasing addresses.
 186     SDNode *Lead = Loads[0];
 187     AddFlags(Lead, SDValue(0,0), true, DAG);
 188     SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
 189     for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
 190       bool OutFlag = i < e-1;
 191       SDNode *Load = Loads[i];
 192       AddFlags(Load, InFlag, OutFlag, DAG);
 193       if (OutFlag)
 194         InFlag = SDValue(Load, Load->getNumValues()-1);
 195       ++LoadsClustered;
 196     }
 197   }
 198 }
 199
 200 void ScheduleDAGSDNodes::BuildSchedUnits() {
 201   // During scheduling, the NodeId field of SDNode is used to map SDNodes
 202   // to their associated SUnits by holding SUnits table indices. A value
 203   // of -1 means the SDNode does not yet have an associated SUnit.
 204   unsigned NumNodes = 0;
 205   for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
 206        E = DAG->allnodes_end(); NI != E; ++NI) {
 207     NI->setNodeId(-1);
 208     ++NumNodes;
 209   }
 210
 211   // Reserve entries in the vector for each of the SUnits we are creating.  This
 212   // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
 213   // invalidated.
 214   // FIXME: Multiply by 2 because we may clone nodes during scheduling.
 215   // This is a temporary workaround.
 216   SUnits.reserve(NumNodes * 2);
 217
 218   // Check to see if the scheduler cares about latencies.
 219   bool UnitLatencies = ForceUnitLatencies();
 220
 221   for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
 222        E = DAG->allnodes_end(); NI != E; ++NI) {
 223     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
 224       continue;
 225
 226     // If this node has already been processed, stop now.
 227     if (NI->getNodeId() != -1) continue;
 228
 229     SUnit *NodeSUnit = NewSUnit(NI);
 230
 231     // See if anything is flagged to this node, if so, add them to flagged
 232     // nodes.  Nodes can have at most one flag input and one flag output.  Flags
 233     // are required to be the last operand and result of a node.
 234
 235     // Scan up to find flagged preds.
 236     SDNode *N = NI;
 237     while (N->getNumOperands() &&
 238            N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
 239       N = N->getOperand(N->getNumOperands()-1).getNode();
 240       assert(N->getNodeId() == -1 && "Node already inserted!");
 241       N->setNodeId(NodeSUnit->NodeNum);
 242     }
 243
 244     // Scan down to find any flagged succs.
 245     N = NI;
 246     while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
 247       SDValue FlagVal(N, N->getNumValues()-1);
 248
 249       // There are either zero or one users of the Flag result.
 250       bool HasFlagUse = false;
 251       for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
 252            UI != E; ++UI)
 253         if (FlagVal.isOperandOf(*UI)) {
 254           HasFlagUse = true;
 255           assert(N->getNodeId() == -1 && "Node already inserted!");
 256           N->setNodeId(NodeSUnit->NodeNum);
 257           N = *UI;
 258           break;
 259         }
 260       if (!HasFlagUse) break;
 261     }
 262
 263     // If there are flag operands involved, N is now the bottom-most node
 264     // of the sequence of nodes that are flagged together.
 265     // Update the SUnit.
 266     NodeSUnit->setNode(N);
 267     assert(N->getNodeId() == -1 && "Node already inserted!");
 268     N->setNodeId(NodeSUnit->NodeNum);
 269
 270     // Assign the Latency field of NodeSUnit using target-provided information.
 271     if (UnitLatencies)
 272       NodeSUnit->Latency = 1;
 273     else
 274       ComputeLatency(NodeSUnit);
 275   }
 276 }
 277
 278 void ScheduleDAGSDNodes::AddSchedEdges() {
 279   const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
 280
 281   // Check to see if the scheduler cares about latencies.
 282   bool UnitLatencies = ForceUnitLatencies();
 283
 284   // Pass 2: add the preds, succs, etc.
 285   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
 286     SUnit *SU = &SUnits[su];
 287     SDNode *MainNode = SU->getNode();
 288
 289     if (MainNode->isMachineOpcode()) {
 290       unsigned Opc = MainNode->getMachineOpcode();
 291       const TargetInstrDesc &TID = TII->get(Opc);
 292       for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
 293         if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
 294           SU->isTwoAddress = true;
 295           break;
 296         }
 297       }
 298       if (TID.isCommutable())
 299         SU->isCommutable = true;
 300     }
 301
 302     // Find all predecessors and successors of the group.
 303     for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
 304       if (N->isMachineOpcode() &&
 305           TII->get(N->getMachineOpcode()).getImplicitDefs()) {
 306         SU->hasPhysRegClobbers = true;
 307         unsigned NumUsed = InstrEmitter::CountResults(N);
 308         while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
 309           --NumUsed;    // Skip over unused values at the end.
 310         if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
 311           SU->hasPhysRegDefs = true;
 312       }
 313
 314       for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
 315         SDNode *OpN = N->getOperand(i).getNode();
 316         if (isPassiveNode(OpN)) continue;   // Not scheduled.
 317         SUnit *OpSU = &SUnits[OpN->getNodeId()];
 318         assert(OpSU && "Node has no SUnit!");
 319         if (OpSU == SU) continue;           // In the same group.
 320
 321         EVT OpVT = N->getOperand(i).getValueType();
 322         assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
 323         bool isChain = OpVT == MVT::Other;
 324
 325         unsigned PhysReg = 0;
 326         int Cost = 1;
 327         // Determine if this is a physical register dependency.
 328         CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
 329         assert((PhysReg == 0 || !isChain) &&
 330                "Chain dependence via physreg data?");
 331         // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
 332         // emits a copy from the physical register to a virtual register unless
 333         // it requires a cross class copy (cost < 0). That means we are only
 334         // treating "expensive to copy" register dependency as physical register
 335         // dependency. This may change in the future though.
 336         if (Cost >= 0)
 337           PhysReg = 0;
 338
 339         const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
 340                                OpSU->Latency, PhysReg);
 341         if (!isChain && !UnitLatencies) {
 342           ComputeOperandLatency(OpSU, SU, (SDep &)dep);
 343           ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
 344         }
 345
 346         SU->addPred(dep);
 347       }
 348     }
 349   }
 350 }
 351
 352 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
 353 /// are input.  This SUnit graph is similar to the SelectionDAG, but
 354 /// excludes nodes that aren't interesting to scheduling, and represents
 355 /// flagged together nodes with a single SUnit.
 356 void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
 357   // Cluster loads from "near" addresses into combined SUnits.
 358   ClusterNeighboringLoads();
 359   // Populate the SUnits array.
 360   BuildSchedUnits();
 361   // Compute all the scheduling dependencies between nodes.
 362   AddSchedEdges();
 363 }
 364
 365 void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
 366   const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
 367
 368   // Compute the latency for the node.  We use the sum of the latencies for
 369   // all nodes flagged together into this SUnit.
 370   SU->Latency = 0;
 371   for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
 372     if (N->isMachineOpcode()) {
 373       SU->Latency += InstrItins.
 374         getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
 375     }
 376 }
 377
 378 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
 379   if (!SU->getNode()) {
 380     dbgs() << "PHYS REG COPY\n";
 381     return;
 382   }
 383
 384   SU->getNode()->dump(DAG);
 385   dbgs() << "\n";
 386   SmallVector<SDNode *, 4> FlaggedNodes;
 387   for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
 388     FlaggedNodes.push_back(N);
 389   while (!FlaggedNodes.empty()) {
 390     dbgs() << "    ";
 391     FlaggedNodes.back()->dump(DAG);
 392     dbgs() << "\n";
 393     FlaggedNodes.pop_back();
 394   }
 395 }
 396
 397 /// EmitSchedule - Emit the machine code in scheduled order.
 398 MachineBasicBlock *ScheduleDAGSDNodes::
 399 EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
 400   InstrEmitter Emitter(BB, InsertPos);
 401   DenseMap<SDValue, unsigned> VRBaseMap;
 402   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
 403   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
 404     SUnit *SU = Sequence[i];
 405     if (!SU) {
 406       // Null SUnit* is a noop.
 407       EmitNoop();
 408       continue;
 409     }
 410
 411     // For pre-regalloc scheduling, create instructions corresponding to the
 412     // SDNode and any flagged SDNodes and append them to the block.
 413     if (!SU->getNode()) {
 414       // Emit a copy.
 415       EmitPhysRegCopy(SU, CopyVRBaseMap);
 416       continue;
 417     }
 418
 419     SmallVector<SDNode *, 4> FlaggedNodes;
 420     for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
 421          N = N->getFlaggedNode())
 422       FlaggedNodes.push_back(N);
 423     while (!FlaggedNodes.empty()) {
 424       Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
 425                        VRBaseMap, EM);
 426       FlaggedNodes.pop_back();
 427     }
 428     Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
 429                      VRBaseMap, EM);
 430   }
 431
 432   BB = Emitter.getBlock();
 433   InsertPos = Emitter.getInsertPos();
 434   return BB;
 435 }