lib/CodeGen/ScheduleDAGInstrs.cpp

   1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling
  11 // of MachineInstrs.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "sched-instrs"
  16 #include "llvm/Operator.h"
  17 #include "llvm/Analysis/AliasAnalysis.h"
  18 #include "llvm/Analysis/ValueTracking.h"
  19 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  20 #include "llvm/CodeGen/MachineFunctionPass.h"
  21 #include "llvm/CodeGen/MachineMemOperand.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/PseudoSourceValue.h"
  24 #include "llvm/CodeGen/RegisterPressure.h"
  25 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  26 #include "llvm/MC/MCInstrItineraries.h"
  27 #include "llvm/Target/TargetMachine.h"
  28 #include "llvm/Target/TargetInstrInfo.h"
  29 #include "llvm/Target/TargetRegisterInfo.h"
  30 #include "llvm/Target/TargetSubtargetInfo.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/raw_ostream.h"
  34 #include "llvm/ADT/SmallSet.h"
  35 #include "llvm/ADT/SmallPtrSet.h"
  36 using namespace llvm;
  37
  38 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
  39     cl::ZeroOrMore, cl::init(false),
  40     cl::desc("Enable use of AA during MI GAD construction"));
  41
  42 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
  43                                      const MachineLoopInfo &mli,
  44                                      const MachineDominatorTree &mdt,
  45                                      bool IsPostRAFlag,
  46                                      LiveIntervals *lis)
  47   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
  48     InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
  49     IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
  50   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
  51   DbgValues.clear();
  52   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
  53          "Virtual registers must be removed prior to PostRA scheduling");
  54
  55   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
  56   SchedModel.init(*ST.getSchedModel(), &ST, TII);
  57 }
  58
  59 /// getUnderlyingObjectFromInt - This is the function that does the work of
  60 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
  61 static const Value *getUnderlyingObjectFromInt(const Value *V) {
  62   do {
  63     if (const Operator *U = dyn_cast<Operator>(V)) {
  64       // If we find a ptrtoint, we can transfer control back to the
  65       // regular getUnderlyingObjectFromInt.
  66       if (U->getOpcode() == Instruction::PtrToInt)
  67         return U->getOperand(0);
  68       // If we find an add of a constant or a multiplied value, it's
  69       // likely that the other operand will lead us to the base
  70       // object. We don't have to worry about the case where the
  71       // object address is somehow being computed by the multiply,
  72       // because our callers only care when the result is an
  73       // identifibale object.
  74       if (U->getOpcode() != Instruction::Add ||
  75           (!isa<ConstantInt>(U->getOperand(1)) &&
  76            Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
  77         return V;
  78       V = U->getOperand(0);
  79     } else {
  80       return V;
  81     }
  82     assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
  83   } while (1);
  84 }
  85
  86 /// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
  87 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
  88 static const Value *getUnderlyingObject(const Value *V) {
  89   // First just call Value::getUnderlyingObject to let it do what it does.
  90   do {
  91     V = GetUnderlyingObject(V);
  92     // If it found an inttoptr, use special code to continue climing.
  93     if (Operator::getOpcode(V) != Instruction::IntToPtr)
  94       break;
  95     const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
  96     // If that succeeded in finding a pointer, continue the search.
  97     if (!O->getType()->isPointerTy())
  98       break;
  99     V = O;
 100   } while (1);
 101   return V;
 102 }
 103
 104 /// getUnderlyingObjectForInstr - If this machine instr has memory reference
 105 /// information and it can be tracked to a normal reference to a known
 106 /// object, return the Value for that object. Otherwise return null.
 107 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
 108                                                 const MachineFrameInfo *MFI,
 109                                                 bool &MayAlias) {
 110   MayAlias = true;
 111   if (!MI->hasOneMemOperand() ||
 112       !(*MI->memoperands_begin())->getValue() ||
 113       (*MI->memoperands_begin())->isVolatile())
 114     return 0;
 115
 116   const Value *V = (*MI->memoperands_begin())->getValue();
 117   if (!V)
 118     return 0;
 119
 120   V = getUnderlyingObject(V);
 121   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
 122     // For now, ignore PseudoSourceValues which may alias LLVM IR values
 123     // because the code that uses this function has no way to cope with
 124     // such aliases.
 125     if (PSV->isAliased(MFI))
 126       return 0;
 127
 128     MayAlias = PSV->mayAlias(MFI);
 129     return V;
 130   }
 131
 132   if (isIdentifiedObject(V))
 133     return V;
 134
 135   return 0;
 136 }
 137
 138 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
 139   BB = bb;
 140 }
 141
 142 void ScheduleDAGInstrs::finishBlock() {
 143   // Subclasses should no longer refer to the old block.
 144   BB = 0;
 145 }
 146
 147 /// Initialize the map with the number of registers.
 148 void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
 149   PhysRegSet.setUniverse(Limit);
 150   SUnits.resize(Limit);
 151 }
 152
 153 /// Clear the map without deallocating storage.
 154 void Reg2SUnitsMap::clear() {
 155   for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
 156     SUnits[*I].clear();
 157   }
 158   PhysRegSet.clear();
 159 }
 160
 161 /// Initialize the DAG and common scheduler state for the current scheduling
 162 /// region. This does not actually create the DAG, only clears it. The
 163 /// scheduling driver may call BuildSchedGraph multiple times per scheduling
 164 /// region.
 165 void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
 166                                     MachineBasicBlock::iterator begin,
 167                                     MachineBasicBlock::iterator end,
 168                                     unsigned endcount) {
 169   assert(bb == BB && "startBlock should set BB");
 170   RegionBegin = begin;
 171   RegionEnd = end;
 172   EndIndex = endcount;
 173   MISUnitMap.clear();
 174
 175   ScheduleDAG::clearDAG();
 176 }
 177
 178 /// Close the current scheduling region. Don't clear any state in case the
 179 /// driver wants to refer to the previous scheduling region.
 180 void ScheduleDAGInstrs::exitRegion() {
 181   // Nothing to do.
 182 }
 183
 184 /// addSchedBarrierDeps - Add dependencies from instructions in the current
 185 /// list of instructions being scheduled to scheduling barrier by adding
 186 /// the exit SU to the register defs and use list. This is because we want to
 187 /// make sure instructions which define registers that are either used by
 188 /// the terminator or are live-out are properly scheduled. This is
 189 /// especially important when the definition latency of the return value(s)
 190 /// are too high to be hidden by the branch or when the liveout registers
 191 /// used by instructions in the fallthrough block.
 192 void ScheduleDAGInstrs::addSchedBarrierDeps() {
 193   MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
 194   ExitSU.setInstr(ExitMI);
 195   bool AllDepKnown = ExitMI &&
 196     (ExitMI->isCall() || ExitMI->isBarrier());
 197   if (ExitMI && AllDepKnown) {
 198     // If it's a call or a barrier, add dependencies on the defs and uses of
 199     // instruction.
 200     for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
 201       const MachineOperand &MO = ExitMI->getOperand(i);
 202       if (!MO.isReg() || MO.isDef()) continue;
 203       unsigned Reg = MO.getReg();
 204       if (Reg == 0) continue;
 205
 206       if (TRI->isPhysicalRegister(Reg))
 207         Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
 208       else {
 209         assert(!IsPostRA && "Virtual register encountered after regalloc.");
 210         addVRegUseDeps(&ExitSU, i);
 211       }
 212     }
 213   } else {
 214     // For others, e.g. fallthrough, conditional branch, assume the exit
 215     // uses all the registers that are livein to the successor blocks.
 216     assert(Uses.empty() && "Uses in set before adding deps?");
 217     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
 218            SE = BB->succ_end(); SI != SE; ++SI)
 219       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
 220              E = (*SI)->livein_end(); I != E; ++I) {
 221         unsigned Reg = *I;
 222         if (!Uses.contains(Reg))
 223           Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
 224       }
 225   }
 226 }
 227
 228 /// MO is an operand of SU's instruction that defines a physical register. Add
 229 /// data dependencies from SU to any uses of the physical register.
 230 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
 231   const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
 232   assert(MO.isDef() && "expect physreg def");
 233
 234   // Ask the target if address-backscheduling is desirable, and if so how much.
 235   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 236
 237   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
 238        Alias.isValid(); ++Alias) {
 239     if (!Uses.contains(*Alias))
 240       continue;
 241     std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
 242     for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
 243       SUnit *UseSU = UseList[i].SU;
 244       if (UseSU == SU)
 245         continue;
 246
 247       SDep dep(SU, SDep::Data, 1, *Alias);
 248
 249       // Adjust the dependence latency using operand def/use information,
 250       // then allow the target to perform its own adjustments.
 251       int UseOp = UseList[i].OpIdx;
 252       MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
 253       dep.setLatency(
 254         SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
 255                                          RegUse, UseOp, /*FindMin=*/false));
 256       dep.setMinLatency(
 257         SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
 258                                          RegUse, UseOp, /*FindMin=*/true));
 259
 260       ST.adjustSchedDependency(SU, UseSU, dep);
 261       UseSU->addPred(dep);
 262     }
 263   }
 264 }
 265
 266 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from
 267 /// this SUnit to following instructions in the same scheduling region that
 268 /// depend the physical register referenced at OperIdx.
 269 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
 270   const MachineInstr *MI = SU->getInstr();
 271   const MachineOperand &MO = MI->getOperand(OperIdx);
 272
 273   // Optionally add output and anti dependencies. For anti
 274   // dependencies we use a latency of 0 because for a multi-issue
 275   // target we want to allow the defining instruction to issue
 276   // in the same cycle as the using instruction.
 277   // TODO: Using a latency of 1 here for output dependencies assumes
 278   //       there's no cost for reusing registers.
 279   SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
 280   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
 281        Alias.isValid(); ++Alias) {
 282     if (!Defs.contains(*Alias))
 283       continue;
 284     std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
 285     for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
 286       SUnit *DefSU = DefList[i].SU;
 287       if (DefSU == &ExitSU)
 288         continue;
 289       if (DefSU != SU &&
 290           (Kind != SDep::Output || !MO.isDead() ||
 291            !DefSU->getInstr()->registerDefIsDead(*Alias))) {
 292         if (Kind == SDep::Anti)
 293           DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
 294         else {
 295           unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
 296                                                  DefSU->getInstr());
 297           DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
 298         }
 299       }
 300     }
 301   }
 302
 303   if (!MO.isDef()) {
 304     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
 305     // retrieve the existing SUnits list for this register's uses.
 306     // Push this SUnit on the use list.
 307     Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
 308   }
 309   else {
 310     addPhysRegDataDeps(SU, OperIdx);
 311
 312     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
 313     // retrieve the existing SUnits list for this register's defs.
 314     std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
 315
 316     // clear this register's use list
 317     if (Uses.contains(MO.getReg()))
 318       Uses[MO.getReg()].clear();
 319
 320     if (!MO.isDead())
 321       DefList.clear();
 322
 323     // Calls will not be reordered because of chain dependencies (see
 324     // below). Since call operands are dead, calls may continue to be added
 325     // to the DefList making dependence checking quadratic in the size of
 326     // the block. Instead, we leave only one call at the back of the
 327     // DefList.
 328     if (SU->isCall) {
 329       while (!DefList.empty() && DefList.back().SU->isCall)
 330         DefList.pop_back();
 331     }
 332     // Defs are pushed in the order they are visited and never reordered.
 333     DefList.push_back(PhysRegSUOper(SU, OperIdx));
 334   }
 335 }
 336
 337 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
 338 /// to instructions that occur later in the same scheduling region if they read
 339 /// from or write to the virtual register defined at OperIdx.
 340 ///
 341 /// TODO: Hoist loop induction variable increments. This has to be
 342 /// reevaluated. Generally, IV scheduling should be done before coalescing.
 343 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 344   const MachineInstr *MI = SU->getInstr();
 345   unsigned Reg = MI->getOperand(OperIdx).getReg();
 346
 347   // Singly defined vregs do not have output/anti dependencies.
 348   // The current operand is a def, so we have at least one.
 349   // Check here if there are any others...
 350   if (MRI.hasOneDef(Reg))
 351     return;
 352
 353   // Add output dependence to the next nearest def of this vreg.
 354   //
 355   // Unless this definition is dead, the output dependence should be
 356   // transitively redundant with antidependencies from this definition's
 357   // uses. We're conservative for now until we have a way to guarantee the uses
 358   // are not eliminated sometime during scheduling. The output dependence edge
 359   // is also useful if output latency exceeds def-use latency.
 360   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 361   if (DefI == VRegDefs.end())
 362     VRegDefs.insert(VReg2SUnit(Reg, SU));
 363   else {
 364     SUnit *DefSU = DefI->SU;
 365     if (DefSU != SU && DefSU != &ExitSU) {
 366       unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
 367                                                   DefSU->getInstr());
 368       DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
 369     }
 370     DefI->SU = SU;
 371   }
 372 }
 373
 374 /// addVRegUseDeps - Add a register data dependency if the instruction that
 375 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
 376 /// register antidependency from this SUnit to instructions that occur later in
 377 /// the same scheduling region if they write the virtual register.
 378 ///
 379 /// TODO: Handle ExitSU "uses" properly.
 380 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 381   MachineInstr *MI = SU->getInstr();
 382   unsigned Reg = MI->getOperand(OperIdx).getReg();
 383
 384   // Lookup this operand's reaching definition.
 385   assert(LIS && "vreg dependencies requires LiveIntervals");
 386   LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
 387   VNInfo *VNI = LRQ.valueIn();
 388
 389   // VNI will be valid because MachineOperand::readsReg() is checked by caller.
 390   assert(VNI && "No value to read by operand");
 391   MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
 392   // Phis and other noninstructions (after coalescing) have a NULL Def.
 393   if (Def) {
 394     SUnit *DefSU = getSUnit(Def);
 395     if (DefSU) {
 396       // The reaching Def lives within this scheduling region.
 397       // Create a data dependence.
 398       SDep dep(DefSU, SDep::Data, 1, Reg);
 399       // Adjust the dependence latency using operand def/use information, then
 400       // allow the target to perform its own adjustments.
 401       int DefOp = Def->findRegisterDefOperandIdx(Reg);
 402       dep.setLatency(
 403         SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
 404       dep.setMinLatency(
 405         SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
 406
 407       const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 408       ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
 409       SU->addPred(dep);
 410     }
 411   }
 412
 413   // Add antidependence to the following def of the vreg it uses.
 414   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 415   if (DefI != VRegDefs.end() && DefI->SU != SU)
 416     DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
 417 }
 418
 419 /// Return true if MI is an instruction we are unable to reason about
 420 /// (like a call or something with unmodeled side effects).
 421 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
 422   if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
 423       (MI->hasOrderedMemoryRef() &&
 424        (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
 425     return true;
 426   return false;
 427 }
 428
 429 // This MI might have either incomplete info, or known to be unsafe
 430 // to deal with (i.e. volatile object).
 431 static inline bool isUnsafeMemoryObject(MachineInstr *MI,
 432                                         const MachineFrameInfo *MFI) {
 433   if (!MI || MI->memoperands_empty())
 434     return true;
 435   // We purposefully do no check for hasOneMemOperand() here
 436   // in hope to trigger an assert downstream in order to
 437   // finish implementation.
 438   if ((*MI->memoperands_begin())->isVolatile() ||
 439        MI->hasUnmodeledSideEffects())
 440     return true;
 441
 442   const Value *V = (*MI->memoperands_begin())->getValue();
 443   if (!V)
 444     return true;
 445
 446   V = getUnderlyingObject(V);
 447   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
 448     // Similarly to getUnderlyingObjectForInstr:
 449     // For now, ignore PseudoSourceValues which may alias LLVM IR values
 450     // because the code that uses this function has no way to cope with
 451     // such aliases.
 452     if (PSV->isAliased(MFI))
 453       return true;
 454   }
 455   // Does this pointer refer to a distinct and identifiable object?
 456   if (!isIdentifiedObject(V))
 457     return true;
 458
 459   return false;
 460 }
 461
 462 /// This returns true if the two MIs need a chain edge betwee them.
 463 /// If these are not even memory operations, we still may need
 464 /// chain deps between them. The question really is - could
 465 /// these two MIs be reordered during scheduling from memory dependency
 466 /// point of view.
 467 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 468                              MachineInstr *MIa,
 469                              MachineInstr *MIb) {
 470   // Cover a trivial case - no edge is need to itself.
 471   if (MIa == MIb)
 472     return false;
 473
 474   if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
 475     return true;
 476
 477   // If we are dealing with two "normal" loads, we do not need an edge
 478   // between them - they could be reordered.
 479   if (!MIa->mayStore() && !MIb->mayStore())
 480     return false;
 481
 482   // To this point analysis is generic. From here on we do need AA.
 483   if (!AA)
 484     return true;
 485
 486   MachineMemOperand *MMOa = *MIa->memoperands_begin();
 487   MachineMemOperand *MMOb = *MIb->memoperands_begin();
 488
 489   // FIXME: Need to handle multiple memory operands to support all targets.
 490   if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
 491     llvm_unreachable("Multiple memory operands.");
 492
 493   // The following interface to AA is fashioned after DAGCombiner::isAlias
 494   // and operates with MachineMemOperand offset with some important
 495   // assumptions:
 496   //   - LLVM fundamentally assumes flat address spaces.
 497   //   - MachineOperand offset can *only* result from legalization and
 498   //     cannot affect queries other than the trivial case of overlap
 499   //     checking.
 500   //   - These offsets never wrap and never step outside
 501   //     of allocated objects.
 502   //   - There should never be any negative offsets here.
 503   //
 504   // FIXME: Modify API to hide this math from "user"
 505   // FIXME: Even before we go to AA we can reason locally about some
 506   // memory objects. It can save compile time, and possibly catch some
 507   // corner cases not currently covered.
 508
 509   assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
 510   assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
 511
 512   int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
 513   int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
 514   int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
 515
 516   AliasAnalysis::AliasResult AAResult = AA->alias(
 517   AliasAnalysis::Location(MMOa->getValue(), Overlapa,
 518                           MMOa->getTBAAInfo()),
 519   AliasAnalysis::Location(MMOb->getValue(), Overlapb,
 520                           MMOb->getTBAAInfo()));
 521
 522   return (AAResult != AliasAnalysis::NoAlias);
 523 }
 524
 525 /// This recursive function iterates over chain deps of SUb looking for
 526 /// "latest" node that needs a chain edge to SUa.
 527 static unsigned
 528 iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 529                  SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
 530                  SmallPtrSet<const SUnit*, 16> &Visited) {
 531   if (!SUa || !SUb || SUb == ExitSU)
 532     return *Depth;
 533
 534   // Remember visited nodes.
 535   if (!Visited.insert(SUb))
 536       return *Depth;
 537   // If there is _some_ dependency already in place, do not
 538   // descend any further.
 539   // TODO: Need to make sure that if that dependency got eliminated or ignored
 540   // for any reason in the future, we would not violate DAG topology.
 541   // Currently it does not happen, but makes an implicit assumption about
 542   // future implementation.
 543   //
 544   // Independently, if we encounter node that is some sort of global
 545   // object (like a call) we already have full set of dependencies to it
 546   // and we can stop descending.
 547   if (SUa->isSucc(SUb) ||
 548       isGlobalMemoryObject(AA, SUb->getInstr()))
 549     return *Depth;
 550
 551   // If we do need an edge, or we have exceeded depth budget,
 552   // add that edge to the predecessors chain of SUb,
 553   // and stop descending.
 554   if (*Depth > 200 ||
 555       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
 556     SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
 557                       /*isNormalMemory=*/true));
 558     return *Depth;
 559   }
 560   // Track current depth.
 561   (*Depth)++;
 562   // Iterate over chain dependencies only.
 563   for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
 564        I != E; ++I)
 565     if (I->isCtrl())
 566       iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
 567   return *Depth;
 568 }
 569
 570 /// This function assumes that "downward" from SU there exist
 571 /// tail/leaf of already constructed DAG. It iterates downward and
 572 /// checks whether SU can be aliasing any node dominated
 573 /// by it.
 574 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 575                             SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
 576                             unsigned LatencyToLoad) {
 577   if (!SU)
 578     return;
 579
 580   SmallPtrSet<const SUnit*, 16> Visited;
 581   unsigned Depth = 0;
 582
 583   for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
 584        I != IE; ++I) {
 585     if (SU == *I)
 586       continue;
 587     if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
 588       unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
 589       (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
 590                          /*isNormalMemory=*/true));
 591     }
 592     // Now go through all the chain successors and iterate from them.
 593     // Keep track of visited nodes.
 594     for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
 595          JE = (*I)->Succs.end(); J != JE; ++J)
 596       if (J->isCtrl())
 597         iterateChainSucc (AA, MFI, SU, J->getSUnit(),
 598                           ExitSU, &Depth, Visited);
 599   }
 600 }
 601
 602 /// Check whether two objects need a chain edge, if so, add it
 603 /// otherwise remember the rejected SU.
 604 static inline
 605 void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
 606                          SUnit *SUa, SUnit *SUb,
 607                          std::set<SUnit *> &RejectList,
 608                          unsigned TrueMemOrderLatency = 0,
 609                          bool isNormalMemory = false) {
 610   // If this is a false dependency,
 611   // do not add the edge, but rememeber the rejected node.
 612   if (!EnableAASchedMI ||
 613       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
 614     SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
 615                       isNormalMemory));
 616   else {
 617     // Duplicate entries should be ignored.
 618     RejectList.insert(SUb);
 619     DEBUG(dbgs() << "\tReject chain dep between SU("
 620           << SUa->NodeNum << ") and SU("
 621           << SUb->NodeNum << ")\n");
 622   }
 623 }
 624
 625 /// Create an SUnit for each real instruction, numbered in top-down toplological
 626 /// order. The instruction order A < B, implies that no edge exists from B to A.
 627 ///
 628 /// Map each real instruction to its SUnit.
 629 ///
 630 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
 631 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
 632 /// instead of pointers.
 633 ///
 634 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
 635 /// the original instruction list.
 636 void ScheduleDAGInstrs::initSUnits() {
 637   // We'll be allocating one SUnit for each real instruction in the region,
 638   // which is contained within a basic block.
 639   SUnits.reserve(BB->size());
 640
 641   for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
 642     MachineInstr *MI = I;
 643     if (MI->isDebugValue())
 644       continue;
 645
 646     SUnit *SU = newSUnit(MI);
 647     MISUnitMap[MI] = SU;
 648
 649     SU->isCall = MI->isCall();
 650     SU->isCommutable = MI->isCommutable();
 651
 652     // Assign the Latency field of SU using target-provided information.
 653     computeLatency(SU);
 654   }
 655 }
 656
 657 /// If RegPressure is non null, compute register pressure as a side effect. The
 658 /// DAG builder is an efficient place to do it because it already visits
 659 /// operands.
 660 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 661                                         RegPressureTracker *RPTracker) {
 662   // Create an SUnit for each real instruction.
 663   initSUnits();
 664
 665   // We build scheduling units by walking a block's instruction list from bottom
 666   // to top.
 667
 668   // Remember where a generic side-effecting instruction is as we procede.
 669   SUnit *BarrierChain = 0, *AliasChain = 0;
 670
 671   // Memory references to specific known memory locations are tracked
 672   // so that they can be given more precise dependencies. We track
 673   // separately the known memory locations that may alias and those
 674   // that are known not to alias
 675   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
 676   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 677   std::set<SUnit*> RejectMemNodes;
 678
 679   // Remove any stale debug info; sometimes BuildSchedGraph is called again
 680   // without emitting the info from the previous call.
 681   DbgValues.clear();
 682   FirstDbgValue = NULL;
 683
 684   assert(Defs.empty() && Uses.empty() &&
 685          "Only BuildGraph should update Defs/Uses");
 686   Defs.setRegLimit(TRI->getNumRegs());
 687   Uses.setRegLimit(TRI->getNumRegs());
 688
 689   assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
 690   // FIXME: Allow SparseSet to reserve space for the creation of virtual
 691   // registers during scheduling. Don't artificially inflate the Universe
 692   // because we want to assert that vregs are not created during DAG building.
 693   VRegDefs.setUniverse(MRI.getNumVirtRegs());
 694
 695   // Model data dependencies between instructions being scheduled and the
 696   // ExitSU.
 697   addSchedBarrierDeps();
 698
 699   // Walk the list of instructions, from bottom moving up.
 700   MachineInstr *PrevMI = NULL;
 701   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
 702        MII != MIE; --MII) {
 703     MachineInstr *MI = prior(MII);
 704     if (MI && PrevMI) {
 705       DbgValues.push_back(std::make_pair(PrevMI, MI));
 706       PrevMI = NULL;
 707     }
 708
 709     if (MI->isDebugValue()) {
 710       PrevMI = MI;
 711       continue;
 712     }
 713     if (RPTracker) {
 714       RPTracker->recede();
 715       assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
 716     }
 717
 718     assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
 719            "Cannot schedule terminators or labels!");
 720
 721     SUnit *SU = MISUnitMap[MI];
 722     assert(SU && "No SUnit mapped to this MI");
 723
 724     // Add register-based dependencies (data, anti, and output).
 725     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
 726       const MachineOperand &MO = MI->getOperand(j);
 727       if (!MO.isReg()) continue;
 728       unsigned Reg = MO.getReg();
 729       if (Reg == 0) continue;
 730
 731       if (TRI->isPhysicalRegister(Reg))
 732         addPhysRegDeps(SU, j);
 733       else {
 734         assert(!IsPostRA && "Virtual register encountered!");
 735         if (MO.isDef())
 736           addVRegDefDeps(SU, j);
 737         else if (MO.readsReg()) // ignore undef operands
 738           addVRegUseDeps(SU, j);
 739       }
 740     }
 741
 742     // Add chain dependencies.
 743     // Chain dependencies used to enforce memory order should have
 744     // latency of 0 (except for true dependency of Store followed by
 745     // aliased Load... we estimate that with a single cycle of latency
 746     // assuming the hardware will bypass)
 747     // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
 748     // after stack slots are lowered to actual addresses.
 749     // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
 750     // produce more precise dependence information.
 751     unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
 752     if (isGlobalMemoryObject(AA, MI)) {
 753       // Be conservative with these and add dependencies on all memory
 754       // references, even those that are known to not alias.
 755       for (std::map<const Value *, SUnit *>::iterator I =
 756              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
 757         I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 758       }
 759       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
 760              NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
 761         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
 762           I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
 763       }
 764       // Add SU to the barrier chain.
 765       if (BarrierChain)
 766         BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 767       BarrierChain = SU;
 768       // This is a barrier event that acts as a pivotal node in the DAG,
 769       // so it is safe to clear list of exposed nodes.
 770       adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 771                       TrueMemOrderLatency);
 772       RejectMemNodes.clear();
 773       NonAliasMemDefs.clear();
 774       NonAliasMemUses.clear();
 775
 776       // fall-through
 777     new_alias_chain:
 778       // Chain all possibly aliasing memory references though SU.
 779       if (AliasChain) {
 780         unsigned ChainLatency = 0;
 781         if (AliasChain->getInstr()->mayLoad())
 782           ChainLatency = TrueMemOrderLatency;
 783         addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
 784                            ChainLatency);
 785       }
 786       AliasChain = SU;
 787       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 788         addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
 789                            TrueMemOrderLatency);
 790       for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
 791            E = AliasMemDefs.end(); I != E; ++I)
 792         addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 793       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
 794            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
 795         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
 796           addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
 797                              TrueMemOrderLatency);
 798       }
 799       adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 800                       TrueMemOrderLatency);
 801       PendingLoads.clear();
 802       AliasMemDefs.clear();
 803       AliasMemUses.clear();
 804     } else if (MI->mayStore()) {
 805       bool MayAlias = true;
 806       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
 807         // A store to a specific PseudoSourceValue. Add precise dependencies.
 808         // Record the def in MemDefs, first adding a dep if there is
 809         // an existing def.
 810         std::map<const Value *, SUnit *>::iterator I =
 811           ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
 812         std::map<const Value *, SUnit *>::iterator IE =
 813           ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
 814         if (I != IE) {
 815           addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
 816                              0, true);
 817           I->second = SU;
 818         } else {
 819           if (MayAlias)
 820             AliasMemDefs[V] = SU;
 821           else
 822             NonAliasMemDefs[V] = SU;
 823         }
 824         // Handle the uses in MemUses, if there are any.
 825         std::map<const Value *, std::vector<SUnit *> >::iterator J =
 826           ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
 827         std::map<const Value *, std::vector<SUnit *> >::iterator JE =
 828           ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
 829         if (J != JE) {
 830           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
 831             addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
 832                                TrueMemOrderLatency, true);
 833           J->second.clear();
 834         }
 835         if (MayAlias) {
 836           // Add dependencies from all the PendingLoads, i.e. loads
 837           // with no underlying object.
 838           for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 839             addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
 840                                TrueMemOrderLatency);
 841           // Add dependence on alias chain, if needed.
 842           if (AliasChain)
 843             addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
 844           // But we also should check dependent instructions for the
 845           // SU in question.
 846           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 847                           TrueMemOrderLatency);
 848         }
 849         // Add dependence on barrier chain, if needed.
 850         // There is no point to check aliasing on barrier event. Even if
 851         // SU and barrier _could_ be reordered, they should not. In addition,
 852         // we have lost all RejectMemNodes below barrier.
 853         if (BarrierChain)
 854           BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 855       } else {
 856         // Treat all other stores conservatively.
 857         goto new_alias_chain;
 858       }
 859
 860       if (!ExitSU.isPred(SU))
 861         // Push store's up a bit to avoid them getting in between cmp
 862         // and branches.
 863         ExitSU.addPred(SDep(SU, SDep::Order, 0,
 864                             /*Reg=*/0, /*isNormalMemory=*/false,
 865                             /*isMustAlias=*/false,
 866                             /*isArtificial=*/true));
 867     } else if (MI->mayLoad()) {
 868       bool MayAlias = true;
 869       if (MI->isInvariantLoad(AA)) {
 870         // Invariant load, no chain dependencies needed!
 871       } else {
 872         if (const Value *V =
 873             getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
 874           // A load from a specific PseudoSourceValue. Add precise dependencies.
 875           std::map<const Value *, SUnit *>::iterator I =
 876             ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
 877           std::map<const Value *, SUnit *>::iterator IE =
 878             ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
 879           if (I != IE)
 880             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
 881           if (MayAlias)
 882             AliasMemUses[V].push_back(SU);
 883           else
 884             NonAliasMemUses[V].push_back(SU);
 885         } else {
 886           // A load with no underlying object. Depend on all
 887           // potentially aliasing stores.
 888           for (std::map<const Value *, SUnit *>::iterator I =
 889                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
 890             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 891
 892           PendingLoads.push_back(SU);
 893           MayAlias = true;
 894         }
 895         if (MayAlias)
 896           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
 897         // Add dependencies on alias and barrier chains, if needed.
 898         if (MayAlias && AliasChain)
 899           addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
 900         if (BarrierChain)
 901           BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 902       }
 903     }
 904   }
 905   if (PrevMI)
 906     FirstDbgValue = PrevMI;
 907
 908   Defs.clear();
 909   Uses.clear();
 910   VRegDefs.clear();
 911   PendingLoads.clear();
 912 }
 913
 914 void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
 915   // Compute the latency for the node. We only provide a default for missing
 916   // itineraries. Empty itineraries still have latency properties.
 917   if (!InstrItins) {
 918     SU->Latency = 1;
 919
 920     // Simplistic target-independent heuristic: assume that loads take
 921     // extra time.
 922     if (SU->getInstr()->mayLoad())
 923       SU->Latency += 2;
 924   } else {
 925     SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
 926   }
 927 }
 928
 929 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
 930 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 931   SU->getInstr()->dump();
 932 #endif
 933 }
 934
 935 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 936   std::string s;
 937   raw_string_ostream oss(s);
 938   if (SU == &EntrySU)
 939     oss << "<entry>";
 940   else if (SU == &ExitSU)
 941     oss << "<exit>";
 942   else
 943     SU->getInstr()->print(oss);
 944   return oss.str();
 945 }
 946
 947 /// Return the basic block label. It is not necessarilly unique because a block
 948 /// contains multiple scheduling regions. But it is fine for visualization.
 949 std::string ScheduleDAGInstrs::getDAGName() const {
 950   return "dag." + BB->getFullName();
 951 }