lib/CodeGen/ScheduleDAGInstrs.cpp

   1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling
  11 // of MachineInstrs.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "sched-instrs"
  16 #include "llvm/Operator.h"
  17 #include "llvm/Analysis/AliasAnalysis.h"
  18 #include "llvm/Analysis/ValueTracking.h"
  19 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  20 #include "llvm/CodeGen/MachineFunctionPass.h"
  21 #include "llvm/CodeGen/MachineMemOperand.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/PseudoSourceValue.h"
  24 #include "llvm/CodeGen/RegisterPressure.h"
  25 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  26 #include "llvm/MC/MCInstrItineraries.h"
  27 #include "llvm/Target/TargetMachine.h"
  28 #include "llvm/Target/TargetInstrInfo.h"
  29 #include "llvm/Target/TargetRegisterInfo.h"
  30 #include "llvm/Target/TargetSubtargetInfo.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/raw_ostream.h"
  34 #include "llvm/ADT/SmallSet.h"
  35 #include "llvm/ADT/SmallPtrSet.h"
  36 using namespace llvm;
  37
  38 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
  39     cl::ZeroOrMore, cl::init(false),
  40     cl::desc("Enable use of AA during MI GAD construction"));
  41
  42 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
  43                                      const MachineLoopInfo &mli,
  44                                      const MachineDominatorTree &mdt,
  45                                      bool IsPostRAFlag,
  46                                      LiveIntervals *lis)
  47   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
  48     InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
  49     IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
  50     LoopRegs(MDT), FirstDbgValue(0) {
  51   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
  52   DbgValues.clear();
  53   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
  54          "Virtual registers must be removed prior to PostRA scheduling");
  55
  56   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
  57   SchedModel.init(*ST.getSchedModel(), &ST, TII);
  58 }
  59
  60 /// getUnderlyingObjectFromInt - This is the function that does the work of
  61 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
  62 static const Value *getUnderlyingObjectFromInt(const Value *V) {
  63   do {
  64     if (const Operator *U = dyn_cast<Operator>(V)) {
  65       // If we find a ptrtoint, we can transfer control back to the
  66       // regular getUnderlyingObjectFromInt.
  67       if (U->getOpcode() == Instruction::PtrToInt)
  68         return U->getOperand(0);
  69       // If we find an add of a constant or a multiplied value, it's
  70       // likely that the other operand will lead us to the base
  71       // object. We don't have to worry about the case where the
  72       // object address is somehow being computed by the multiply,
  73       // because our callers only care when the result is an
  74       // identifibale object.
  75       if (U->getOpcode() != Instruction::Add ||
  76           (!isa<ConstantInt>(U->getOperand(1)) &&
  77            Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
  78         return V;
  79       V = U->getOperand(0);
  80     } else {
  81       return V;
  82     }
  83     assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
  84   } while (1);
  85 }
  86
  87 /// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
  88 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
  89 static const Value *getUnderlyingObject(const Value *V) {
  90   // First just call Value::getUnderlyingObject to let it do what it does.
  91   do {
  92     V = GetUnderlyingObject(V);
  93     // If it found an inttoptr, use special code to continue climing.
  94     if (Operator::getOpcode(V) != Instruction::IntToPtr)
  95       break;
  96     const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
  97     // If that succeeded in finding a pointer, continue the search.
  98     if (!O->getType()->isPointerTy())
  99       break;
 100     V = O;
 101   } while (1);
 102   return V;
 103 }
 104
 105 /// getUnderlyingObjectForInstr - If this machine instr has memory reference
 106 /// information and it can be tracked to a normal reference to a known
 107 /// object, return the Value for that object. Otherwise return null.
 108 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
 109                                                 const MachineFrameInfo *MFI,
 110                                                 bool &MayAlias) {
 111   MayAlias = true;
 112   if (!MI->hasOneMemOperand() ||
 113       !(*MI->memoperands_begin())->getValue() ||
 114       (*MI->memoperands_begin())->isVolatile())
 115     return 0;
 116
 117   const Value *V = (*MI->memoperands_begin())->getValue();
 118   if (!V)
 119     return 0;
 120
 121   V = getUnderlyingObject(V);
 122   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
 123     // For now, ignore PseudoSourceValues which may alias LLVM IR values
 124     // because the code that uses this function has no way to cope with
 125     // such aliases.
 126     if (PSV->isAliased(MFI))
 127       return 0;
 128
 129     MayAlias = PSV->mayAlias(MFI);
 130     return V;
 131   }
 132
 133   if (isIdentifiedObject(V))
 134     return V;
 135
 136   return 0;
 137 }
 138
 139 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
 140   BB = bb;
 141   LoopRegs.Deps.clear();
 142   if (MachineLoop *ML = MLI.getLoopFor(BB))
 143     if (BB == ML->getLoopLatch())
 144       LoopRegs.VisitLoop(ML);
 145 }
 146
 147 void ScheduleDAGInstrs::finishBlock() {
 148   // Subclasses should no longer refer to the old block.
 149   BB = 0;
 150 }
 151
 152 /// Initialize the map with the number of registers.
 153 void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
 154   PhysRegSet.setUniverse(Limit);
 155   SUnits.resize(Limit);
 156 }
 157
 158 /// Clear the map without deallocating storage.
 159 void Reg2SUnitsMap::clear() {
 160   for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
 161     SUnits[*I].clear();
 162   }
 163   PhysRegSet.clear();
 164 }
 165
 166 /// Initialize the DAG and common scheduler state for the current scheduling
 167 /// region. This does not actually create the DAG, only clears it. The
 168 /// scheduling driver may call BuildSchedGraph multiple times per scheduling
 169 /// region.
 170 void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
 171                                     MachineBasicBlock::iterator begin,
 172                                     MachineBasicBlock::iterator end,
 173                                     unsigned endcount) {
 174   assert(bb == BB && "startBlock should set BB");
 175   RegionBegin = begin;
 176   RegionEnd = end;
 177   EndIndex = endcount;
 178   MISUnitMap.clear();
 179
 180   // Check to see if the scheduler cares about latencies.
 181   UnitLatencies = forceUnitLatencies();
 182
 183   ScheduleDAG::clearDAG();
 184 }
 185
 186 /// Close the current scheduling region. Don't clear any state in case the
 187 /// driver wants to refer to the previous scheduling region.
 188 void ScheduleDAGInstrs::exitRegion() {
 189   // Nothing to do.
 190 }
 191
 192 /// addSchedBarrierDeps - Add dependencies from instructions in the current
 193 /// list of instructions being scheduled to scheduling barrier by adding
 194 /// the exit SU to the register defs and use list. This is because we want to
 195 /// make sure instructions which define registers that are either used by
 196 /// the terminator or are live-out are properly scheduled. This is
 197 /// especially important when the definition latency of the return value(s)
 198 /// are too high to be hidden by the branch or when the liveout registers
 199 /// used by instructions in the fallthrough block.
 200 void ScheduleDAGInstrs::addSchedBarrierDeps() {
 201   MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
 202   ExitSU.setInstr(ExitMI);
 203   bool AllDepKnown = ExitMI &&
 204     (ExitMI->isCall() || ExitMI->isBarrier());
 205   if (ExitMI && AllDepKnown) {
 206     // If it's a call or a barrier, add dependencies on the defs and uses of
 207     // instruction.
 208     for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
 209       const MachineOperand &MO = ExitMI->getOperand(i);
 210       if (!MO.isReg() || MO.isDef()) continue;
 211       unsigned Reg = MO.getReg();
 212       if (Reg == 0) continue;
 213
 214       if (TRI->isPhysicalRegister(Reg))
 215         Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
 216       else {
 217         assert(!IsPostRA && "Virtual register encountered after regalloc.");
 218         addVRegUseDeps(&ExitSU, i);
 219       }
 220     }
 221   } else {
 222     // For others, e.g. fallthrough, conditional branch, assume the exit
 223     // uses all the registers that are livein to the successor blocks.
 224     assert(Uses.empty() && "Uses in set before adding deps?");
 225     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
 226            SE = BB->succ_end(); SI != SE; ++SI)
 227       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
 228              E = (*SI)->livein_end(); I != E; ++I) {
 229         unsigned Reg = *I;
 230         if (!Uses.contains(Reg))
 231           Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
 232       }
 233   }
 234 }
 235
 236 /// MO is an operand of SU's instruction that defines a physical register. Add
 237 /// data dependencies from SU to any uses of the physical register.
 238 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
 239   const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
 240   assert(MO.isDef() && "expect physreg def");
 241
 242   // Ask the target if address-backscheduling is desirable, and if so how much.
 243   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 244   unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 245   unsigned DataLatency = SU->Latency;
 246
 247   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
 248        Alias.isValid(); ++Alias) {
 249     if (!Uses.contains(*Alias))
 250       continue;
 251     std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
 252     for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
 253       SUnit *UseSU = UseList[i].SU;
 254       if (UseSU == SU)
 255         continue;
 256       MachineInstr *UseMI = UseSU->getInstr();
 257       int UseOp = UseList[i].OpIdx;
 258       unsigned LDataLatency = DataLatency;
 259       // Optionally add in a special extra latency for nodes that
 260       // feed addresses.
 261       // TODO: Perhaps we should get rid of
 262       // SpecialAddressLatency and just move this into
 263       // adjustSchedDependency for the targets that care about it.
 264       if (SpecialAddressLatency != 0 && !UnitLatencies &&
 265           UseSU != &ExitSU) {
 266         const MCInstrDesc &UseMCID = UseMI->getDesc();
 267         int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
 268         assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
 269         if (RegUseIndex >= 0 &&
 270             (UseMI->mayLoad() || UseMI->mayStore()) &&
 271             (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
 272             UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
 273           LDataLatency += SpecialAddressLatency;
 274       }
 275       // Adjust the dependence latency using operand def/use
 276       // information (if any), and then allow the target to
 277       // perform its own adjustments.
 278       SDep dep(SU, SDep::Data, LDataLatency, *Alias);
 279       if (!UnitLatencies) {
 280         MachineInstr *RegUse = UseOp < 0 ? 0 : UseMI;
 281         dep.setLatency(
 282           SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
 283                                            RegUse, UseOp, /*FindMin=*/false));
 284         dep.setMinLatency(
 285           SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
 286                                            RegUse, UseOp, /*FindMin=*/true));
 287
 288         ST.adjustSchedDependency(SU, UseSU, dep);
 289       }
 290       UseSU->addPred(dep);
 291     }
 292   }
 293 }
 294
 295 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from
 296 /// this SUnit to following instructions in the same scheduling region that
 297 /// depend the physical register referenced at OperIdx.
 298 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
 299   const MachineInstr *MI = SU->getInstr();
 300   const MachineOperand &MO = MI->getOperand(OperIdx);
 301
 302   // Optionally add output and anti dependencies. For anti
 303   // dependencies we use a latency of 0 because for a multi-issue
 304   // target we want to allow the defining instruction to issue
 305   // in the same cycle as the using instruction.
 306   // TODO: Using a latency of 1 here for output dependencies assumes
 307   //       there's no cost for reusing registers.
 308   SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
 309   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
 310        Alias.isValid(); ++Alias) {
 311     if (!Defs.contains(*Alias))
 312       continue;
 313     std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
 314     for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
 315       SUnit *DefSU = DefList[i].SU;
 316       if (DefSU == &ExitSU)
 317         continue;
 318       if (DefSU != SU &&
 319           (Kind != SDep::Output || !MO.isDead() ||
 320            !DefSU->getInstr()->registerDefIsDead(*Alias))) {
 321         if (Kind == SDep::Anti)
 322           DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
 323         else {
 324           unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
 325                                                  DefSU->getInstr());
 326           DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
 327         }
 328       }
 329     }
 330   }
 331
 332   if (!MO.isDef()) {
 333     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
 334     // retrieve the existing SUnits list for this register's uses.
 335     // Push this SUnit on the use list.
 336     Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
 337   }
 338   else {
 339     addPhysRegDataDeps(SU, OperIdx);
 340
 341     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
 342     // retrieve the existing SUnits list for this register's defs.
 343     std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
 344
 345     // If a def is going to wrap back around to the top of the loop,
 346     // backschedule it.
 347     if (!UnitLatencies && DefList.empty()) {
 348       LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
 349       if (I != LoopRegs.Deps.end()) {
 350         const MachineOperand *UseMO = I->second.first;
 351         unsigned Count = I->second.second;
 352         const MachineInstr *UseMI = UseMO->getParent();
 353         unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
 354         const MCInstrDesc &UseMCID = UseMI->getDesc();
 355         const TargetSubtargetInfo &ST =
 356           TM.getSubtarget<TargetSubtargetInfo>();
 357         unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 358         // TODO: If we knew the total depth of the region here, we could
 359         // handle the case where the whole loop is inside the region but
 360         // is large enough that the isScheduleHigh trick isn't needed.
 361         if (UseMOIdx < UseMCID.getNumOperands()) {
 362           // Currently, we only support scheduling regions consisting of
 363           // single basic blocks. Check to see if the instruction is in
 364           // the same region by checking to see if it has the same parent.
 365           if (UseMI->getParent() != MI->getParent()) {
 366             unsigned Latency = SU->Latency;
 367             if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
 368               Latency += SpecialAddressLatency;
 369             // This is a wild guess as to the portion of the latency which
 370             // will be overlapped by work done outside the current
 371             // scheduling region.
 372             Latency -= std::min(Latency, Count);
 373             // Add the artificial edge.
 374             ExitSU.addPred(SDep(SU, SDep::Order, Latency,
 375                                 /*Reg=*/0, /*isNormalMemory=*/false,
 376                                 /*isMustAlias=*/false,
 377                                 /*isArtificial=*/true));
 378           } else if (SpecialAddressLatency > 0 &&
 379                      UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
 380             // The entire loop body is within the current scheduling region
 381             // and the latency of this operation is assumed to be greater
 382             // than the latency of the loop.
 383             // TODO: Recursively mark data-edge predecessors as
 384             //       isScheduleHigh too.
 385             SU->isScheduleHigh = true;
 386           }
 387         }
 388         LoopRegs.Deps.erase(I);
 389       }
 390     }
 391
 392     // clear this register's use list
 393     if (Uses.contains(MO.getReg()))
 394       Uses[MO.getReg()].clear();
 395
 396     if (!MO.isDead())
 397       DefList.clear();
 398
 399     // Calls will not be reordered because of chain dependencies (see
 400     // below). Since call operands are dead, calls may continue to be added
 401     // to the DefList making dependence checking quadratic in the size of
 402     // the block. Instead, we leave only one call at the back of the
 403     // DefList.
 404     if (SU->isCall) {
 405       while (!DefList.empty() && DefList.back().SU->isCall)
 406         DefList.pop_back();
 407     }
 408     // Defs are pushed in the order they are visited and never reordered.
 409     DefList.push_back(PhysRegSUOper(SU, OperIdx));
 410   }
 411 }
 412
 413 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
 414 /// to instructions that occur later in the same scheduling region if they read
 415 /// from or write to the virtual register defined at OperIdx.
 416 ///
 417 /// TODO: Hoist loop induction variable increments. This has to be
 418 /// reevaluated. Generally, IV scheduling should be done before coalescing.
 419 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 420   const MachineInstr *MI = SU->getInstr();
 421   unsigned Reg = MI->getOperand(OperIdx).getReg();
 422
 423   // Singly defined vregs do not have output/anti dependencies.
 424   // The current operand is a def, so we have at least one.
 425   // Check here if there are any others...
 426   if (MRI.hasOneDef(Reg))
 427     return;
 428
 429   // Add output dependence to the next nearest def of this vreg.
 430   //
 431   // Unless this definition is dead, the output dependence should be
 432   // transitively redundant with antidependencies from this definition's
 433   // uses. We're conservative for now until we have a way to guarantee the uses
 434   // are not eliminated sometime during scheduling. The output dependence edge
 435   // is also useful if output latency exceeds def-use latency.
 436   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 437   if (DefI == VRegDefs.end())
 438     VRegDefs.insert(VReg2SUnit(Reg, SU));
 439   else {
 440     SUnit *DefSU = DefI->SU;
 441     if (DefSU != SU && DefSU != &ExitSU) {
 442       unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
 443                                                   DefSU->getInstr());
 444       DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
 445     }
 446     DefI->SU = SU;
 447   }
 448 }
 449
 450 /// addVRegUseDeps - Add a register data dependency if the instruction that
 451 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
 452 /// register antidependency from this SUnit to instructions that occur later in
 453 /// the same scheduling region if they write the virtual register.
 454 ///
 455 /// TODO: Handle ExitSU "uses" properly.
 456 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 457   MachineInstr *MI = SU->getInstr();
 458   unsigned Reg = MI->getOperand(OperIdx).getReg();
 459
 460   // Lookup this operand's reaching definition.
 461   assert(LIS && "vreg dependencies requires LiveIntervals");
 462   LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
 463   VNInfo *VNI = LRQ.valueIn();
 464
 465   // VNI will be valid because MachineOperand::readsReg() is checked by caller.
 466   assert(VNI && "No value to read by operand");
 467   MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
 468   // Phis and other noninstructions (after coalescing) have a NULL Def.
 469   if (Def) {
 470     SUnit *DefSU = getSUnit(Def);
 471     if (DefSU) {
 472       // The reaching Def lives within this scheduling region.
 473       // Create a data dependence.
 474       //
 475       // TODO: Handle "special" address latencies cleanly.
 476       SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
 477       if (!UnitLatencies) {
 478         // Adjust the dependence latency using operand def/use information, then
 479         // allow the target to perform its own adjustments.
 480         int DefOp = Def->findRegisterDefOperandIdx(Reg);
 481         dep.setLatency(
 482           SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
 483         dep.setMinLatency(
 484           SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
 485
 486         const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 487         ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
 488       }
 489       SU->addPred(dep);
 490     }
 491   }
 492
 493   // Add antidependence to the following def of the vreg it uses.
 494   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 495   if (DefI != VRegDefs.end() && DefI->SU != SU)
 496     DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
 497 }
 498
 499 /// Return true if MI is an instruction we are unable to reason about
 500 /// (like a call or something with unmodeled side effects).
 501 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
 502   if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
 503       (MI->hasOrderedMemoryRef() &&
 504        (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
 505     return true;
 506   return false;
 507 }
 508
 509 // This MI might have either incomplete info, or known to be unsafe
 510 // to deal with (i.e. volatile object).
 511 static inline bool isUnsafeMemoryObject(MachineInstr *MI,
 512                                         const MachineFrameInfo *MFI) {
 513   if (!MI || MI->memoperands_empty())
 514     return true;
 515   // We purposefully do no check for hasOneMemOperand() here
 516   // in hope to trigger an assert downstream in order to
 517   // finish implementation.
 518   if ((*MI->memoperands_begin())->isVolatile() ||
 519        MI->hasUnmodeledSideEffects())
 520     return true;
 521
 522   const Value *V = (*MI->memoperands_begin())->getValue();
 523   if (!V)
 524     return true;
 525
 526   V = getUnderlyingObject(V);
 527   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
 528     // Similarly to getUnderlyingObjectForInstr:
 529     // For now, ignore PseudoSourceValues which may alias LLVM IR values
 530     // because the code that uses this function has no way to cope with
 531     // such aliases.
 532     if (PSV->isAliased(MFI))
 533       return true;
 534   }
 535   // Does this pointer refer to a distinct and identifiable object?
 536   if (!isIdentifiedObject(V))
 537     return true;
 538
 539   return false;
 540 }
 541
 542 /// This returns true if the two MIs need a chain edge betwee them.
 543 /// If these are not even memory operations, we still may need
 544 /// chain deps between them. The question really is - could
 545 /// these two MIs be reordered during scheduling from memory dependency
 546 /// point of view.
 547 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 548                              MachineInstr *MIa,
 549                              MachineInstr *MIb) {
 550   // Cover a trivial case - no edge is need to itself.
 551   if (MIa == MIb)
 552     return false;
 553
 554   if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
 555     return true;
 556
 557   // If we are dealing with two "normal" loads, we do not need an edge
 558   // between them - they could be reordered.
 559   if (!MIa->mayStore() && !MIb->mayStore())
 560     return false;
 561
 562   // To this point analysis is generic. From here on we do need AA.
 563   if (!AA)
 564     return true;
 565
 566   MachineMemOperand *MMOa = *MIa->memoperands_begin();
 567   MachineMemOperand *MMOb = *MIb->memoperands_begin();
 568
 569   // FIXME: Need to handle multiple memory operands to support all targets.
 570   if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
 571     llvm_unreachable("Multiple memory operands.");
 572
 573   // The following interface to AA is fashioned after DAGCombiner::isAlias
 574   // and operates with MachineMemOperand offset with some important
 575   // assumptions:
 576   //   - LLVM fundamentally assumes flat address spaces.
 577   //   - MachineOperand offset can *only* result from legalization and
 578   //     cannot affect queries other than the trivial case of overlap
 579   //     checking.
 580   //   - These offsets never wrap and never step outside
 581   //     of allocated objects.
 582   //   - There should never be any negative offsets here.
 583   //
 584   // FIXME: Modify API to hide this math from "user"
 585   // FIXME: Even before we go to AA we can reason locally about some
 586   // memory objects. It can save compile time, and possibly catch some
 587   // corner cases not currently covered.
 588
 589   assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
 590   assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
 591
 592   int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
 593   int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
 594   int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
 595
 596   AliasAnalysis::AliasResult AAResult = AA->alias(
 597   AliasAnalysis::Location(MMOa->getValue(), Overlapa,
 598                           MMOa->getTBAAInfo()),
 599   AliasAnalysis::Location(MMOb->getValue(), Overlapb,
 600                           MMOb->getTBAAInfo()));
 601
 602   return (AAResult != AliasAnalysis::NoAlias);
 603 }
 604
 605 /// This recursive function iterates over chain deps of SUb looking for
 606 /// "latest" node that needs a chain edge to SUa.
 607 static unsigned
 608 iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 609                  SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
 610                  SmallPtrSet<const SUnit*, 16> &Visited) {
 611   if (!SUa || !SUb || SUb == ExitSU)
 612     return *Depth;
 613
 614   // Remember visited nodes.
 615   if (!Visited.insert(SUb))
 616       return *Depth;
 617   // If there is _some_ dependency already in place, do not
 618   // descend any further.
 619   // TODO: Need to make sure that if that dependency got eliminated or ignored
 620   // for any reason in the future, we would not violate DAG topology.
 621   // Currently it does not happen, but makes an implicit assumption about
 622   // future implementation.
 623   //
 624   // Independently, if we encounter node that is some sort of global
 625   // object (like a call) we already have full set of dependencies to it
 626   // and we can stop descending.
 627   if (SUa->isSucc(SUb) ||
 628       isGlobalMemoryObject(AA, SUb->getInstr()))
 629     return *Depth;
 630
 631   // If we do need an edge, or we have exceeded depth budget,
 632   // add that edge to the predecessors chain of SUb,
 633   // and stop descending.
 634   if (*Depth > 200 ||
 635       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
 636     SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
 637                       /*isNormalMemory=*/true));
 638     return *Depth;
 639   }
 640   // Track current depth.
 641   (*Depth)++;
 642   // Iterate over chain dependencies only.
 643   for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
 644        I != E; ++I)
 645     if (I->isCtrl())
 646       iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
 647   return *Depth;
 648 }
 649
 650 /// This function assumes that "downward" from SU there exist
 651 /// tail/leaf of already constructed DAG. It iterates downward and
 652 /// checks whether SU can be aliasing any node dominated
 653 /// by it.
 654 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 655                             SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
 656                             unsigned LatencyToLoad) {
 657   if (!SU)
 658     return;
 659
 660   SmallPtrSet<const SUnit*, 16> Visited;
 661   unsigned Depth = 0;
 662
 663   for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
 664        I != IE; ++I) {
 665     if (SU == *I)
 666       continue;
 667     if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
 668       unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
 669       (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
 670                          /*isNormalMemory=*/true));
 671     }
 672     // Now go through all the chain successors and iterate from them.
 673     // Keep track of visited nodes.
 674     for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
 675          JE = (*I)->Succs.end(); J != JE; ++J)
 676       if (J->isCtrl())
 677         iterateChainSucc (AA, MFI, SU, J->getSUnit(),
 678                           ExitSU, &Depth, Visited);
 679   }
 680 }
 681
 682 /// Check whether two objects need a chain edge, if so, add it
 683 /// otherwise remember the rejected SU.
 684 static inline
 685 void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
 686                          SUnit *SUa, SUnit *SUb,
 687                          std::set<SUnit *> &RejectList,
 688                          unsigned TrueMemOrderLatency = 0,
 689                          bool isNormalMemory = false) {
 690   // If this is a false dependency,
 691   // do not add the edge, but rememeber the rejected node.
 692   if (!EnableAASchedMI ||
 693       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
 694     SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
 695                       isNormalMemory));
 696   else {
 697     // Duplicate entries should be ignored.
 698     RejectList.insert(SUb);
 699     DEBUG(dbgs() << "\tReject chain dep between SU("
 700           << SUa->NodeNum << ") and SU("
 701           << SUb->NodeNum << ")\n");
 702   }
 703 }
 704
 705 /// Create an SUnit for each real instruction, numbered in top-down toplological
 706 /// order. The instruction order A < B, implies that no edge exists from B to A.
 707 ///
 708 /// Map each real instruction to its SUnit.
 709 ///
 710 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
 711 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
 712 /// instead of pointers.
 713 ///
 714 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
 715 /// the original instruction list.
 716 void ScheduleDAGInstrs::initSUnits() {
 717   // We'll be allocating one SUnit for each real instruction in the region,
 718   // which is contained within a basic block.
 719   SUnits.reserve(BB->size());
 720
 721   for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
 722     MachineInstr *MI = I;
 723     if (MI->isDebugValue())
 724       continue;
 725
 726     SUnit *SU = newSUnit(MI);
 727     MISUnitMap[MI] = SU;
 728
 729     SU->isCall = MI->isCall();
 730     SU->isCommutable = MI->isCommutable();
 731
 732     // Assign the Latency field of SU using target-provided information.
 733     if (UnitLatencies)
 734       SU->Latency = 1;
 735     else
 736       computeLatency(SU);
 737   }
 738 }
 739
 740 /// If RegPressure is non null, compute register pressure as a side effect. The
 741 /// DAG builder is an efficient place to do it because it already visits
 742 /// operands.
 743 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 744                                         RegPressureTracker *RPTracker) {
 745   // Create an SUnit for each real instruction.
 746   initSUnits();
 747
 748   // We build scheduling units by walking a block's instruction list from bottom
 749   // to top.
 750
 751   // Remember where a generic side-effecting instruction is as we procede.
 752   SUnit *BarrierChain = 0, *AliasChain = 0;
 753
 754   // Memory references to specific known memory locations are tracked
 755   // so that they can be given more precise dependencies. We track
 756   // separately the known memory locations that may alias and those
 757   // that are known not to alias
 758   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
 759   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 760   std::set<SUnit*> RejectMemNodes;
 761
 762   // Remove any stale debug info; sometimes BuildSchedGraph is called again
 763   // without emitting the info from the previous call.
 764   DbgValues.clear();
 765   FirstDbgValue = NULL;
 766
 767   assert(Defs.empty() && Uses.empty() &&
 768          "Only BuildGraph should update Defs/Uses");
 769   Defs.setRegLimit(TRI->getNumRegs());
 770   Uses.setRegLimit(TRI->getNumRegs());
 771
 772   assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
 773   // FIXME: Allow SparseSet to reserve space for the creation of virtual
 774   // registers during scheduling. Don't artificially inflate the Universe
 775   // because we want to assert that vregs are not created during DAG building.
 776   VRegDefs.setUniverse(MRI.getNumVirtRegs());
 777
 778   // Model data dependencies between instructions being scheduled and the
 779   // ExitSU.
 780   addSchedBarrierDeps();
 781
 782   // Walk the list of instructions, from bottom moving up.
 783   MachineInstr *PrevMI = NULL;
 784   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
 785        MII != MIE; --MII) {
 786     MachineInstr *MI = prior(MII);
 787     if (MI && PrevMI) {
 788       DbgValues.push_back(std::make_pair(PrevMI, MI));
 789       PrevMI = NULL;
 790     }
 791
 792     if (MI->isDebugValue()) {
 793       PrevMI = MI;
 794       continue;
 795     }
 796     if (RPTracker) {
 797       RPTracker->recede();
 798       assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
 799     }
 800
 801     assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
 802            "Cannot schedule terminators or labels!");
 803
 804     SUnit *SU = MISUnitMap[MI];
 805     assert(SU && "No SUnit mapped to this MI");
 806
 807     // Add register-based dependencies (data, anti, and output).
 808     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
 809       const MachineOperand &MO = MI->getOperand(j);
 810       if (!MO.isReg()) continue;
 811       unsigned Reg = MO.getReg();
 812       if (Reg == 0) continue;
 813
 814       if (TRI->isPhysicalRegister(Reg))
 815         addPhysRegDeps(SU, j);
 816       else {
 817         assert(!IsPostRA && "Virtual register encountered!");
 818         if (MO.isDef())
 819           addVRegDefDeps(SU, j);
 820         else if (MO.readsReg()) // ignore undef operands
 821           addVRegUseDeps(SU, j);
 822       }
 823     }
 824
 825     // Add chain dependencies.
 826     // Chain dependencies used to enforce memory order should have
 827     // latency of 0 (except for true dependency of Store followed by
 828     // aliased Load... we estimate that with a single cycle of latency
 829     // assuming the hardware will bypass)
 830     // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
 831     // after stack slots are lowered to actual addresses.
 832     // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
 833     // produce more precise dependence information.
 834     unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
 835     if (isGlobalMemoryObject(AA, MI)) {
 836       // Be conservative with these and add dependencies on all memory
 837       // references, even those that are known to not alias.
 838       for (std::map<const Value *, SUnit *>::iterator I =
 839              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
 840         I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 841       }
 842       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
 843              NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
 844         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
 845           I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
 846       }
 847       // Add SU to the barrier chain.
 848       if (BarrierChain)
 849         BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 850       BarrierChain = SU;
 851       // This is a barrier event that acts as a pivotal node in the DAG,
 852       // so it is safe to clear list of exposed nodes.
 853       adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 854                       TrueMemOrderLatency);
 855       RejectMemNodes.clear();
 856       NonAliasMemDefs.clear();
 857       NonAliasMemUses.clear();
 858
 859       // fall-through
 860     new_alias_chain:
 861       // Chain all possibly aliasing memory references though SU.
 862       if (AliasChain) {
 863         unsigned ChainLatency = 0;
 864         if (AliasChain->getInstr()->mayLoad())
 865           ChainLatency = TrueMemOrderLatency;
 866         addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
 867                            ChainLatency);
 868       }
 869       AliasChain = SU;
 870       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 871         addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
 872                            TrueMemOrderLatency);
 873       for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
 874            E = AliasMemDefs.end(); I != E; ++I)
 875         addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 876       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
 877            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
 878         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
 879           addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
 880                              TrueMemOrderLatency);
 881       }
 882       adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 883                       TrueMemOrderLatency);
 884       PendingLoads.clear();
 885       AliasMemDefs.clear();
 886       AliasMemUses.clear();
 887     } else if (MI->mayStore()) {
 888       bool MayAlias = true;
 889       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
 890         // A store to a specific PseudoSourceValue. Add precise dependencies.
 891         // Record the def in MemDefs, first adding a dep if there is
 892         // an existing def.
 893         std::map<const Value *, SUnit *>::iterator I =
 894           ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
 895         std::map<const Value *, SUnit *>::iterator IE =
 896           ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
 897         if (I != IE) {
 898           addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
 899                              0, true);
 900           I->second = SU;
 901         } else {
 902           if (MayAlias)
 903             AliasMemDefs[V] = SU;
 904           else
 905             NonAliasMemDefs[V] = SU;
 906         }
 907         // Handle the uses in MemUses, if there are any.
 908         std::map<const Value *, std::vector<SUnit *> >::iterator J =
 909           ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
 910         std::map<const Value *, std::vector<SUnit *> >::iterator JE =
 911           ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
 912         if (J != JE) {
 913           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
 914             addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
 915                                TrueMemOrderLatency, true);
 916           J->second.clear();
 917         }
 918         if (MayAlias) {
 919           // Add dependencies from all the PendingLoads, i.e. loads
 920           // with no underlying object.
 921           for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 922             addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
 923                                TrueMemOrderLatency);
 924           // Add dependence on alias chain, if needed.
 925           if (AliasChain)
 926             addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
 927           // But we also should check dependent instructions for the
 928           // SU in question.
 929           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 930                           TrueMemOrderLatency);
 931         }
 932         // Add dependence on barrier chain, if needed.
 933         // There is no point to check aliasing on barrier event. Even if
 934         // SU and barrier _could_ be reordered, they should not. In addition,
 935         // we have lost all RejectMemNodes below barrier.
 936         if (BarrierChain)
 937           BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 938       } else {
 939         // Treat all other stores conservatively.
 940         goto new_alias_chain;
 941       }
 942
 943       if (!ExitSU.isPred(SU))
 944         // Push store's up a bit to avoid them getting in between cmp
 945         // and branches.
 946         ExitSU.addPred(SDep(SU, SDep::Order, 0,
 947                             /*Reg=*/0, /*isNormalMemory=*/false,
 948                             /*isMustAlias=*/false,
 949                             /*isArtificial=*/true));
 950     } else if (MI->mayLoad()) {
 951       bool MayAlias = true;
 952       if (MI->isInvariantLoad(AA)) {
 953         // Invariant load, no chain dependencies needed!
 954       } else {
 955         if (const Value *V =
 956             getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
 957           // A load from a specific PseudoSourceValue. Add precise dependencies.
 958           std::map<const Value *, SUnit *>::iterator I =
 959             ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
 960           std::map<const Value *, SUnit *>::iterator IE =
 961             ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
 962           if (I != IE)
 963             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
 964           if (MayAlias)
 965             AliasMemUses[V].push_back(SU);
 966           else
 967             NonAliasMemUses[V].push_back(SU);
 968         } else {
 969           // A load with no underlying object. Depend on all
 970           // potentially aliasing stores.
 971           for (std::map<const Value *, SUnit *>::iterator I =
 972                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
 973             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 974
 975           PendingLoads.push_back(SU);
 976           MayAlias = true;
 977         }
 978         if (MayAlias)
 979           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
 980         // Add dependencies on alias and barrier chains, if needed.
 981         if (MayAlias && AliasChain)
 982           addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
 983         if (BarrierChain)
 984           BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 985       }
 986     }
 987   }
 988   if (PrevMI)
 989     FirstDbgValue = PrevMI;
 990
 991   Defs.clear();
 992   Uses.clear();
 993   VRegDefs.clear();
 994   PendingLoads.clear();
 995 }
 996
 997 void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
 998   // Compute the latency for the node. We only provide a default for missing
 999   // itineraries. Empty itineraries still have latency properties.
1000   if (!InstrItins) {
1001     SU->Latency = 1;
1002
1003     // Simplistic target-independent heuristic: assume that loads take
1004     // extra time.
1005     if (SU->getInstr()->mayLoad())
1006       SU->Latency += 2;
1007   } else {
1008     SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
1009   }
1010 }
1011
1012 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1014   SU->getInstr()->dump();
1015 #endif
1016 }
1017
1018 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1019   std::string s;
1020   raw_string_ostream oss(s);
1021   if (SU == &EntrySU)
1022     oss << "<entry>";
1023   else if (SU == &ExitSU)
1024     oss << "<exit>";
1025   else
1026     SU->getInstr()->print(oss);
1027   return oss.str();
1028 }
1029
1030 /// Return the basic block label. It is not necessarilly unique because a block
1031 /// contains multiple scheduling regions. But it is fine for visualization.
1032 std::string ScheduleDAGInstrs::getDAGName() const {
1033   return "dag." + BB->getFullName();
1034 }