lib/CodeGen/ScheduleDAGInstrs.cpp

   1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling
  11 // of MachineInstrs.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "sched-instrs"
  16 #include "ScheduleDAGInstrs.h"
  17 #include "llvm/Analysis/AliasAnalysis.h"
  18 #include "llvm/CodeGen/MachineFunctionPass.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/CodeGen/PseudoSourceValue.h"
  21 #include "llvm/Target/TargetMachine.h"
  22 #include "llvm/Target/TargetInstrInfo.h"
  23 #include "llvm/Target/TargetRegisterInfo.h"
  24 #include "llvm/Target/TargetSubtarget.h"
  25 #include "llvm/Support/Debug.h"
  26 #include "llvm/Support/raw_ostream.h"
  27 #include "llvm/ADT/SmallSet.h"
  28 using namespace llvm;
  29
  30 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
  31                                      const MachineLoopInfo &mli,
  32                                      const MachineDominatorTree &mdt)
  33   : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {}
  34
  35 /// getOpcode - If this is an Instruction or a ConstantExpr, return the
  36 /// opcode value. Otherwise return UserOp1.
  37 static unsigned getOpcode(const Value *V) {
  38   if (const Instruction *I = dyn_cast<Instruction>(V))
  39     return I->getOpcode();
  40   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
  41     return CE->getOpcode();
  42   // Use UserOp1 to mean there's no opcode.
  43   return Instruction::UserOp1;
  44 }
  45
  46 /// getUnderlyingObjectFromInt - This is the function that does the work of
  47 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
  48 static const Value *getUnderlyingObjectFromInt(const Value *V) {
  49   do {
  50     if (const User *U = dyn_cast<User>(V)) {
  51       // If we find a ptrtoint, we can transfer control back to the
  52       // regular getUnderlyingObjectFromInt.
  53       if (getOpcode(U) == Instruction::PtrToInt)
  54         return U->getOperand(0);
  55       // If we find an add of a constant or a multiplied value, it's
  56       // likely that the other operand will lead us to the base
  57       // object. We don't have to worry about the case where the
  58       // object address is somehow being computed bt the multiply,
  59       // because our callers only care when the result is an
  60       // identifibale object.
  61       if (getOpcode(U) != Instruction::Add ||
  62           (!isa<ConstantInt>(U->getOperand(1)) &&
  63            getOpcode(U->getOperand(1)) != Instruction::Mul))
  64         return V;
  65       V = U->getOperand(0);
  66     } else {
  67       return V;
  68     }
  69     assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
  70   } while (1);
  71 }
  72
  73 /// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
  74 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
  75 static const Value *getUnderlyingObject(const Value *V) {
  76   // First just call Value::getUnderlyingObject to let it do what it does.
  77   do {
  78     V = V->getUnderlyingObject();
  79     // If it found an inttoptr, use special code to continue climing.
  80     if (getOpcode(V) != Instruction::IntToPtr)
  81       break;
  82     const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
  83     // If that succeeded in finding a pointer, continue the search.
  84     if (!isa<PointerType>(O->getType()))
  85       break;
  86     V = O;
  87   } while (1);
  88   return V;
  89 }
  90
  91 /// getUnderlyingObjectForInstr - If this machine instr has memory reference
  92 /// information and it can be tracked to a normal reference to a known
  93 /// object, return the Value for that object. Otherwise return null.
  94 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) {
  95   if (!MI->hasOneMemOperand() ||
  96       !MI->memoperands_begin()->getValue() ||
  97       MI->memoperands_begin()->isVolatile())
  98     return 0;
  99
 100   const Value *V = MI->memoperands_begin()->getValue();
 101   if (!V)
 102     return 0;
 103
 104   V = getUnderlyingObject(V);
 105   if (!isa<PseudoSourceValue>(V) && !isIdentifiedObject(V))
 106     return 0;
 107
 108   return V;
 109 }
 110
 111 void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
 112   if (MachineLoop *ML = MLI.getLoopFor(BB))
 113     if (BB == ML->getLoopLatch()) {
 114       MachineBasicBlock *Header = ML->getHeader();
 115       for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
 116            E = Header->livein_end(); I != E; ++I)
 117         LoopLiveInRegs.insert(*I);
 118       LoopRegs.VisitLoop(ML);
 119     }
 120 }
 121
 122 void ScheduleDAGInstrs::BuildSchedGraph() {
 123   // We'll be allocating one SUnit for each instruction, plus one for
 124   // the region exit node.
 125   SUnits.reserve(BB->size());
 126
 127   // We build scheduling units by walking a block's instruction list from bottom
 128   // to top.
 129
 130   // Remember where a generic side-effecting instruction is as we procede. If
 131   // ChainMMO is null, this is assumed to have arbitrary side-effects. If
 132   // ChainMMO is non-null, then Chain makes only a single memory reference.
 133   SUnit *Chain = 0;
 134   MachineMemOperand *ChainMMO = 0;
 135
 136   // Memory references to specific known memory locations are tracked so that
 137   // they can be given more precise dependencies.
 138   std::map<const Value *, SUnit *> MemDefs;
 139   std::map<const Value *, std::vector<SUnit *> > MemUses;
 140
 141   // Check to see if the scheduler cares about latencies.
 142   bool UnitLatencies = ForceUnitLatencies();
 143
 144   // Ask the target if address-backscheduling is desirable, and if so how much.
 145   unsigned SpecialAddressLatency =
 146     TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency();
 147
 148   // Walk the list of instructions, from bottom moving up.
 149   for (MachineBasicBlock::iterator MII = End, MIE = Begin;
 150        MII != MIE; --MII) {
 151     MachineInstr *MI = prior(MII);
 152     const TargetInstrDesc &TID = MI->getDesc();
 153     assert(!TID.isTerminator() && !MI->isLabel() &&
 154            "Cannot schedule terminators or labels!");
 155     // Create the SUnit for this MI.
 156     SUnit *SU = NewSUnit(MI);
 157
 158     // Assign the Latency field of SU using target-provided information.
 159     if (UnitLatencies)
 160       SU->Latency = 1;
 161     else
 162       ComputeLatency(SU);
 163
 164     // Add register-based dependencies (data, anti, and output).
 165     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
 166       const MachineOperand &MO = MI->getOperand(j);
 167       if (!MO.isReg()) continue;
 168       unsigned Reg = MO.getReg();
 169       if (Reg == 0) continue;
 170
 171       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
 172       std::vector<SUnit *> &UseList = Uses[Reg];
 173       std::vector<SUnit *> &DefList = Defs[Reg];
 174       // Optionally add output and anti dependencies.
 175       // TODO: Using a latency of 1 here assumes there's no cost for
 176       //       reusing registers.
 177       SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
 178       for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
 179         SUnit *DefSU = DefList[i];
 180         if (DefSU != SU &&
 181             (Kind != SDep::Output || !MO.isDead() ||
 182              !DefSU->getInstr()->registerDefIsDead(Reg)))
 183           DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
 184       }
 185       for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
 186         std::vector<SUnit *> &DefList = Defs[*Alias];
 187         for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
 188           SUnit *DefSU = DefList[i];
 189           if (DefSU != SU &&
 190               (Kind != SDep::Output || !MO.isDead() ||
 191                !DefSU->getInstr()->registerDefIsDead(Reg)))
 192             DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
 193         }
 194       }
 195
 196       if (MO.isDef()) {
 197         // Add any data dependencies.
 198         unsigned DataLatency = SU->Latency;
 199         for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
 200           SUnit *UseSU = UseList[i];
 201           if (UseSU != SU) {
 202             unsigned LDataLatency = DataLatency;
 203             // Optionally add in a special extra latency for nodes that
 204             // feed addresses.
 205             // TODO: Do this for register aliases too.
 206             if (SpecialAddressLatency != 0 && !UnitLatencies) {
 207               MachineInstr *UseMI = UseSU->getInstr();
 208               const TargetInstrDesc &UseTID = UseMI->getDesc();
 209               int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
 210               assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
 211               if ((UseTID.mayLoad() || UseTID.mayStore()) &&
 212                   (unsigned)RegUseIndex < UseTID.getNumOperands() &&
 213                   UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
 214                 LDataLatency += SpecialAddressLatency;
 215             }
 216             UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg));
 217           }
 218         }
 219         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
 220           std::vector<SUnit *> &UseList = Uses[*Alias];
 221           for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
 222             SUnit *UseSU = UseList[i];
 223             if (UseSU != SU)
 224               UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
 225           }
 226         }
 227
 228         // If a def is going to wrap back around to the top of the loop,
 229         // backschedule it.
 230         if (!UnitLatencies && DefList.empty()) {
 231           LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
 232           if (I != LoopRegs.Deps.end()) {
 233             const MachineOperand *UseMO = I->second.first;
 234             unsigned Count = I->second.second;
 235             const MachineInstr *UseMI = UseMO->getParent();
 236             unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
 237             const TargetInstrDesc &UseTID = UseMI->getDesc();
 238             // TODO: If we knew the total depth of the region here, we could
 239             // handle the case where the whole loop is inside the region but
 240             // is large enough that the isScheduleHigh trick isn't needed.
 241             if (UseMOIdx < UseTID.getNumOperands()) {
 242               // Currently, we only support scheduling regions consisting of
 243               // single basic blocks. Check to see if the instruction is in
 244               // the same region by checking to see if it has the same parent.
 245               if (UseMI->getParent() != MI->getParent()) {
 246                 unsigned Latency = SU->Latency;
 247                 if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
 248                   Latency += SpecialAddressLatency;
 249                 // This is a wild guess as to the portion of the latency which
 250                 // will be overlapped by work done outside the current
 251                 // scheduling region.
 252                 Latency -= std::min(Latency, Count);
 253                 // Add the artifical edge.
 254                 ExitSU.addPred(SDep(SU, SDep::Order, Latency,
 255                                     /*Reg=*/0, /*isNormalMemory=*/false,
 256                                     /*isMustAlias=*/false,
 257                                     /*isArtificial=*/true));
 258               } else if (SpecialAddressLatency > 0 &&
 259                          UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
 260                 // The entire loop body is within the current scheduling region
 261                 // and the latency of this operation is assumed to be greater
 262                 // than the latency of the loop.
 263                 // TODO: Recursively mark data-edge predecessors as
 264                 //       isScheduleHigh too.
 265                 SU->isScheduleHigh = true;
 266               }
 267             }
 268             LoopRegs.Deps.erase(I);
 269           }
 270         }
 271
 272         UseList.clear();
 273         if (!MO.isDead())
 274           DefList.clear();
 275         DefList.push_back(SU);
 276       } else {
 277         UseList.push_back(SU);
 278       }
 279     }
 280
 281     // Add chain dependencies.
 282     // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
 283     // after stack slots are lowered to actual addresses.
 284     // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
 285     // produce more precise dependence information.
 286     if (TID.isCall() || TID.hasUnmodeledSideEffects()) {
 287     new_chain:
 288       // This is the conservative case. Add dependencies on all memory
 289       // references.
 290       if (Chain)
 291         Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
 292       Chain = SU;
 293       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 294         PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
 295       PendingLoads.clear();
 296       for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
 297            E = MemDefs.end(); I != E; ++I) {
 298         I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
 299         I->second = SU;
 300       }
 301       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
 302            MemUses.begin(), E = MemUses.end(); I != E; ++I) {
 303         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
 304           I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency));
 305         I->second.clear();
 306       }
 307       // See if it is known to just have a single memory reference.
 308       MachineInstr *ChainMI = Chain->getInstr();
 309       const TargetInstrDesc &ChainTID = ChainMI->getDesc();
 310       if (!ChainTID.isCall() &&
 311           !ChainTID.hasUnmodeledSideEffects() &&
 312           ChainMI->hasOneMemOperand() &&
 313           !ChainMI->memoperands_begin()->isVolatile() &&
 314           ChainMI->memoperands_begin()->getValue())
 315         // We know that the Chain accesses one specific memory location.
 316         ChainMMO = &*ChainMI->memoperands_begin();
 317       else
 318         // Unknown memory accesses. Assume the worst.
 319         ChainMMO = 0;
 320     } else if (TID.mayStore()) {
 321       if (const Value *V = getUnderlyingObjectForInstr(MI)) {
 322         // A store to a specific PseudoSourceValue. Add precise dependencies.
 323         // Handle the def in MemDefs, if there is one.
 324         std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
 325         if (I != MemDefs.end()) {
 326           I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
 327                                   /*isNormalMemory=*/true));
 328           I->second = SU;
 329         } else {
 330           MemDefs[V] = SU;
 331         }
 332         // Handle the uses in MemUses, if there are any.
 333         std::map<const Value *, std::vector<SUnit *> >::iterator J =
 334           MemUses.find(V);
 335         if (J != MemUses.end()) {
 336           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
 337             J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
 338                                        /*isNormalMemory=*/true));
 339           J->second.clear();
 340         }
 341         // Add dependencies from all the PendingLoads, since without
 342         // memoperands we must assume they alias anything.
 343         for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 344           PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency));
 345         // Add a general dependence too, if needed.
 346         if (Chain)
 347           Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
 348       } else
 349         // Treat all other stores conservatively.
 350         goto new_chain;
 351     } else if (TID.mayLoad()) {
 352       if (TII->isInvariantLoad(MI)) {
 353         // Invariant load, no chain dependencies needed!
 354       } else if (const Value *V = getUnderlyingObjectForInstr(MI)) {
 355         // A load from a specific PseudoSourceValue. Add precise dependencies.
 356         std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
 357         if (I != MemDefs.end())
 358           I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0,
 359                                   /*isNormalMemory=*/true));
 360         MemUses[V].push_back(SU);
 361
 362         // Add a general dependence too, if needed.
 363         if (Chain && (!ChainMMO ||
 364                       (ChainMMO->isStore() || ChainMMO->isVolatile())))
 365           Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
 366       } else if (MI->hasVolatileMemoryRef()) {
 367         // Treat volatile loads conservatively. Note that this includes
 368         // cases where memoperand information is unavailable.
 369         goto new_chain;
 370       } else {
 371         // A normal load. Depend on the general chain, as well as on
 372         // all stores. In the absense of MachineMemOperand information,
 373         // we can't even assume that the load doesn't alias well-behaved
 374         // memory locations.
 375         if (Chain)
 376           Chain->addPred(SDep(SU, SDep::Order, SU->Latency));
 377         for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
 378              E = MemDefs.end(); I != E; ++I)
 379           I->second->addPred(SDep(SU, SDep::Order, SU->Latency));
 380         PendingLoads.push_back(SU);
 381       }
 382     }
 383   }
 384
 385   for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
 386     Defs[i].clear();
 387     Uses[i].clear();
 388   }
 389   PendingLoads.clear();
 390 }
 391
 392 void ScheduleDAGInstrs::FinishBlock() {
 393   // Nothing to do.
 394 }
 395
 396 void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
 397   const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
 398
 399   // Compute the latency for the node.  We use the sum of the latencies for
 400   // all nodes flagged together into this SUnit.
 401   SU->Latency =
 402     InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
 403
 404   // Simplistic target-independent heuristic: assume that loads take
 405   // extra time.
 406   if (InstrItins.isEmpty())
 407     if (SU->getInstr()->getDesc().mayLoad())
 408       SU->Latency += 2;
 409 }
 410
 411 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
 412   SU->getInstr()->dump();
 413 }
 414
 415 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 416   std::string s;
 417   raw_string_ostream oss(s);
 418   if (SU == &EntrySU)
 419     oss << "<entry>";
 420   else if (SU == &ExitSU)
 421     oss << "<exit>";
 422   else
 423     SU->getInstr()->print(oss);
 424   return oss.str();
 425 }
 426
 427 // EmitSchedule - Emit the machine code in scheduled order.
 428 MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
 429   // For MachineInstr-based scheduling, we're rescheduling the instructions in
 430   // the block, so start by removing them from the block.
 431   while (Begin != End) {
 432     MachineBasicBlock::iterator I = Begin;
 433     ++Begin;
 434     BB->remove(I);
 435   }
 436
 437   // Then re-insert them according to the given schedule.
 438   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
 439     SUnit *SU = Sequence[i];
 440     if (!SU) {
 441       // Null SUnit* is a noop.
 442       EmitNoop();
 443       continue;
 444     }
 445
 446     BB->insert(End, SU->getInstr());
 447   }
 448
 449   // Update the Begin iterator, as the first instruction in the block
 450   // may have been scheduled later.
 451   if (!Sequence.empty())
 452     Begin = Sequence[0]->getInstr();
 453
 454   return BB;
 455 }