Force coallescing of live ranges that have a single definition, even if they
[oota-llvm.git] / lib / CodeGen / LiveIntervalAnalysis.cpp
index f1bb499337071cf2ad23d70339140e456e8251d7..4e5b1804475360616d2f71d11f6e003817230d5f 100644 (file)
@@ -16,7 +16,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "liveintervals"
-#include "llvm/CodeGen/LiveIntervals.h"
+#include "LiveIntervals.h"
+#include "llvm/Value.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Target/MRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/CFG.h"
 #include "Support/CommandLine.h"
 #include "Support/Debug.h"
 #include "Support/Statistic.h"
+#include "Support/STLExtras.h"
+#include "VirtRegMap.h"
 #include <cmath>
-#include <iostream>
-#include <limits>
 
 using namespace llvm;
 
@@ -40,13 +40,25 @@ namespace {
     RegisterAnalysis<LiveIntervals> X("liveintervals",
                                       "Live Interval Analysis");
 
-    Statistic<> numIntervals("liveintervals", "Number of intervals");
-    Statistic<> numJoined   ("liveintervals", "Number of joined intervals");
+    Statistic<> numIntervals
+    ("liveintervals", "Number of original intervals");
+
+    Statistic<> numIntervalsAfter
+    ("liveintervals", "Number of intervals after coalescing");
+
+    Statistic<> numJoins
+    ("liveintervals", "Number of interval joins performed");
+
+    Statistic<> numPeep
+    ("liveintervals", "Number of identity moves eliminated after coalescing");
+
+    Statistic<> numFolded
+    ("liveintervals", "Number of loads/stores folded into instructions");
 
     cl::opt<bool>
-    join("join-liveintervals",
-         cl::desc("Join compatible live intervals"),
-         cl::init(true));
+    EnableJoining("join-liveintervals",
+                  cl::desc("Join compatible live intervals"),
+                  cl::init(true));
 };
 
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const
@@ -62,9 +74,8 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const
 
 void LiveIntervals::releaseMemory()
 {
-    mbbi2mbbMap_.clear();
     mi2iMap_.clear();
-    r2iMap_.clear();
+    i2miMap_.clear();
     r2iMap_.clear();
     r2rMap_.clear();
     intervals_.clear();
@@ -74,7 +85,6 @@ void LiveIntervals::releaseMemory()
 /// runOnMachineFunction - Register allocate the whole function
 ///
 bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
-    DEBUG(std::cerr << "Machine Function\n");
     mf_ = &fn;
     tm_ = &fn.getTarget();
     mri_ = tm_->getRegisterInfo();
@@ -83,156 +93,316 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
     // number MachineInstrs
     unsigned miIndex = 0;
     for (MachineFunction::iterator mbb = mf_->begin(), mbbEnd = mf_->end();
-         mbb != mbbEnd; ++mbb) {
-        const std::pair<MachineBasicBlock*, unsigned>& entry =
-            lv_->getMachineBasicBlockInfo(mbb);
-        bool inserted = mbbi2mbbMap_.insert(std::make_pair(entry.second,
-                                                           entry.first)).second;
-        assert(inserted && "multiple index -> MachineBasicBlock");
-
+         mbb != mbbEnd; ++mbb)
         for (MachineBasicBlock::iterator mi = mbb->begin(), miEnd = mbb->end();
              mi != miEnd; ++mi) {
-            inserted = mi2iMap_.insert(std::make_pair(*mi, miIndex)).second;
+            bool inserted = mi2iMap_.insert(std::make_pair(mi, miIndex)).second;
             assert(inserted && "multiple MachineInstr -> index mappings");
-            ++miIndex;
+            i2miMap_.push_back(mi);
+            miIndex += InstrSlots::NUM;
         }
-    }
 
     computeIntervals();
 
-    // compute spill weights
+    numIntervals += intervals_.size();
+
+    // join intervals if requested
+    if (EnableJoining) joinIntervals();
+
+    numIntervalsAfter += intervals_.size();
+
+    // perform a final pass over the instructions and compute spill
+    // weights, coalesce virtual registers and remove identity moves
     const LoopInfo& loopInfo = getAnalysis<LoopInfo>();
-    const TargetInstrInfo& tii = tm_->getInstrInfo();
+    const TargetInstrInfo& tii = *tm_->getInstrInfo();
 
-    for (MachineFunction::const_iterator mbbi = mf_->begin(),
-             mbbe = mf_->end(); mbbi != mbbe; ++mbbi) {
-        const MachineBasicBlock* mbb = mbbi;
+    for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+         mbbi != mbbe; ++mbbi) {
+        MachineBasicBlock* mbb = mbbi;
         unsigned loopDepth = loopInfo.getLoopDepth(mbb->getBasicBlock());
 
-        for (MachineBasicBlock::const_iterator mii = mbb->begin(),
-                 mie = mbb->end(); mii != mie; ++mii) {
-            MachineInstr* mi = *mii;
-
-            for (int i = mi->getNumOperands() - 1; i >= 0; --i) {
-                MachineOperand& mop = mi->getOperand(i);
-                if (mop.isVirtualRegister()) {
-                    unsigned reg = mop.getAllocatedRegNum();
-                    Reg2IntervalMap::iterator r2iit = r2iMap_.find(reg);
-                    assert(r2iit != r2iMap_.end());
-                    r2iit->second->weight += pow(10.0F, loopDepth);
+        for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+             mii != mie; ) {
+            // if the move will be an identity move delete it
+            unsigned srcReg, dstReg;
+            if (tii.isMoveInstr(*mii, srcReg, dstReg) &&
+                rep(srcReg) == rep(dstReg)) {
+                // remove from def list
+                LiveInterval& interval = getOrCreateInterval(rep(dstReg));
+                // remove index -> MachineInstr and
+                // MachineInstr -> index mappings
+                Mi2IndexMap::iterator mi2i = mi2iMap_.find(mii);
+                if (mi2i != mi2iMap_.end()) {
+                    i2miMap_[mi2i->second/InstrSlots::NUM] = 0;
+                    mi2iMap_.erase(mi2i);
+                }
+                mii = mbbi->erase(mii);
+                ++numPeep;
+            }
+            else {
+                for (unsigned i = 0; i < mii->getNumOperands(); ++i) {
+                    const MachineOperand& mop = mii->getOperand(i);
+                    if (mop.isRegister() && mop.getReg() &&
+                        MRegisterInfo::isVirtualRegister(mop.getReg())) {
+                        // replace register with representative register
+                        unsigned reg = rep(mop.getReg());
+                        mii->SetMachineOperandReg(i, reg);
+
+                        Reg2IntervalMap::iterator r2iit = r2iMap_.find(reg);
+                        assert(r2iit != r2iMap_.end());
+                        r2iit->second->weight +=
+                            (mop.isUse() + mop.isDef()) * pow(10.0F, loopDepth);
+                    }
                 }
+                ++mii;
             }
         }
     }
 
-    // join intervals if requested
-    if (join) joinIntervals();
-
-    numIntervals += intervals_.size();
-
-    intervals_.sort(StartPointComp());
+    DEBUG(std::cerr << "********** INTERVALS **********\n");
     DEBUG(std::copy(intervals_.begin(), intervals_.end(),
-                    std::ostream_iterator<Interval>(std::cerr, "\n")));
+                    std::ostream_iterator<LiveInterval>(std::cerr, "\n")));
+    DEBUG(std::cerr << "********** MACHINEINSTRS **********\n");
+    DEBUG(
+        for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+             mbbi != mbbe; ++mbbi) {
+            std::cerr << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+            for (MachineBasicBlock::iterator mii = mbbi->begin(),
+                     mie = mbbi->end(); mii != mie; ++mii) {
+                std::cerr << getInstructionIndex(mii) << '\t';
+                mii->print(std::cerr, tm_);
+            }
+        });
+
     return true;
 }
 
-void LiveIntervals::printRegName(unsigned reg) const
+std::vector<LiveInterval*> LiveIntervals::addIntervalsForSpills(
+    const LiveInterval& li,
+    VirtRegMap& vrm,
+    int slot)
 {
-    if (MRegisterInfo::isPhysicalRegister(reg))
-        std::cerr << mri_->getName(reg);
-    else
-        std::cerr << '%' << reg;
-}
+    std::vector<LiveInterval*> added;
 
-void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock* mbb,
-                                             MachineBasicBlock::iterator mi,
-                                             unsigned reg)
-{
-    DEBUG(std::cerr << "\t\tregister: ";printRegName(reg); std::cerr << '\n');
+    assert(li.weight != HUGE_VAL &&
+           "attempt to spill already spilled interval!");
 
-    unsigned instrIndex = getInstructionIndex(*mi);
+    DEBUG(std::cerr << "\t\t\t\tadding intervals for spills for interval: "
+          << li << '\n');
 
-    LiveVariables::VarInfo& vi = lv_->getVarInfo(reg);
+    const TargetRegisterClass* rc = mf_->getSSARegMap()->getRegClass(li.reg);
 
-    Interval* interval = 0;
-    Reg2IntervalMap::iterator r2iit = r2iMap_.lower_bound(reg);
-    if (r2iit == r2iMap_.end() || r2iit->first != reg) {
-        // add new interval
-        intervals_.push_back(Interval(reg));
-        // update interval index for this register
-        r2iMap_.insert(r2iit, std::make_pair(reg, --intervals_.end()));
-        interval = &intervals_.back();
-
-        // iterate over all of the blocks that the variable is
-        // completely live in, adding them to the live
-        // interval. obviously we only need to do this once.
-        for (unsigned i = 0, e = vi.AliveBlocks.size(); i != e; ++i) {
-            if (vi.AliveBlocks[i]) {
-                MachineBasicBlock* mbb = lv_->getIndexMachineBasicBlock(i);
-                if (!mbb->empty()) {
-                    interval->addRange(getInstructionIndex(mbb->front()),
-                                       getInstructionIndex(mbb->back()) + 1);
+    for (LiveInterval::Ranges::const_iterator
+              i = li.ranges.begin(), e = li.ranges.end(); i != e; ++i) {
+        unsigned index = getBaseIndex(i->first);
+        unsigned end = getBaseIndex(i->second-1) + InstrSlots::NUM;
+        for (; index != end; index += InstrSlots::NUM) {
+            // skip deleted instructions
+            while (index != end && !getInstructionFromIndex(index))
+                index += InstrSlots::NUM;
+            if (index == end) break;
+
+            MachineBasicBlock::iterator mi = getInstructionFromIndex(index);
+
+        for_operand:
+            for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+                MachineOperand& mop = mi->getOperand(i);
+                if (mop.isRegister() && mop.getReg() == li.reg) {
+                    if (MachineInstr* fmi =
+                        mri_->foldMemoryOperand(mi, i, slot)) {
+                        lv_->instructionChanged(mi, fmi);
+                        vrm.virtFolded(li.reg, mi, fmi);
+                        mi2iMap_.erase(mi);
+                        i2miMap_[index/InstrSlots::NUM] = fmi;
+                        mi2iMap_[fmi] = index;
+                        MachineBasicBlock& mbb = *mi->getParent();
+                        mi = mbb.insert(mbb.erase(mi), fmi);
+                        ++numFolded;
+                        goto for_operand;
+                    }
+                    else {
+                        // This is tricky. We need to add information in
+                        // the interval about the spill code so we have to
+                        // use our extra load/store slots.
+                        //
+                        // If we have a use we are going to have a load so
+                        // we start the interval from the load slot
+                        // onwards. Otherwise we start from the def slot.
+                        unsigned start = (mop.isUse() ?
+                                          getLoadIndex(index) :
+                                          getDefIndex(index));
+                        // If we have a def we are going to have a store
+                        // right after it so we end the interval after the
+                        // use of the next instruction. Otherwise we end
+                        // after the use of this instruction.
+                        unsigned end = 1 + (mop.isDef() ?
+                                            getStoreIndex(index) :
+                                            getUseIndex(index));
+
+                        // create a new register for this spill
+                        unsigned nReg =
+                            mf_->getSSARegMap()->createVirtualRegister(rc);
+                        mi->SetMachineOperandReg(i, nReg);
+                        vrm.grow();
+                        vrm.assignVirt2StackSlot(nReg, slot);
+                        LiveInterval& nI = getOrCreateInterval(nReg);
+                        assert(nI.empty());
+                        // the spill weight is now infinity as it
+                        // cannot be spilled again
+                        nI.weight = HUGE_VAL;
+                        nI.addRange(start, end);
+                        added.push_back(&nI);
+                        // update live variables
+                        lv_->addVirtualRegisterKilled(nReg, mi);
+                        DEBUG(std::cerr << "\t\t\t\tadded new interval: "
+                              << nI << '\n');
+                    }
                 }
             }
         }
     }
-    else {
-        interval = &*r2iit->second;
-    }
 
-    bool killedInDefiningBasicBlock = false;
-    for (int i = 0, e = vi.Kills.size(); i != e; ++i) {
-        MachineBasicBlock* killerBlock = vi.Kills[i].first;
-        MachineInstr* killerInstr = vi.Kills[i].second;
-        unsigned start = (mbb == killerBlock ?
-                          instrIndex :
-                          getInstructionIndex(killerBlock->front()));
-        unsigned end = getInstructionIndex(killerInstr) + 1;
-        // we do not want to add invalid ranges. these can happen when
-        // a variable has its latest use and is redefined later on in
-        // the same basic block (common with variables introduced by
-        // PHI elimination)
-        if (start < end) {
-            killedInDefiningBasicBlock |= mbb == killerBlock;
-            interval->addRange(start, end);
-        }
-    }
+    return added;
+}
+
+void LiveIntervals::printRegName(unsigned reg) const
+{
+    if (MRegisterInfo::isPhysicalRegister(reg))
+        std::cerr << mri_->getName(reg);
+    else
+        std::cerr << "%reg" << reg;
+}
 
-    if (!killedInDefiningBasicBlock) {
-        unsigned end = getInstructionIndex(mbb->back()) + 1;
-        interval->addRange(instrIndex, end);
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock* mbb,
+                                             MachineBasicBlock::iterator mi,
+                                             LiveInterval& interval)
+{
+    DEBUG(std::cerr << "\t\tregister: "; printRegName(interval.reg));
+    LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+
+    // Virtual registers may be defined multiple times (due to phi 
+    // elimination and 2-addr elimination).  Much of what we do only has to be 
+    // done once for the vreg.  We use an empty interval to detect the first 
+    // time we see a vreg.
+    if (interval.empty()) {
+       // Assume this interval is singly defined until we find otherwise.
+       interval.isDefinedOnce = true;
+
+       // Get the Idx of the defining instructions.
+       unsigned defIndex = getDefIndex(getInstructionIndex(mi));
+
+       // Loop over all of the blocks that the vreg is defined in.  There are
+       // two cases we have to handle here.  The most common case is a vreg
+       // whose lifetime is contained within a basic block.  In this case there
+       // will be a single kill, in MBB, which comes after the definition.
+       if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+           // FIXME: what about dead vars?
+           unsigned killIdx;
+           if (vi.Kills[0] != mi)
+               killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1;
+           else
+               killIdx = defIndex+1;
+
+           // If the kill happens after the definition, we have an intra-block
+           // live range.
+           if (killIdx > defIndex) {
+              assert(vi.AliveBlocks.empty() && 
+                     "Shouldn't be alive across any blocks!");
+              interval.addRange(defIndex, killIdx);
+              DEBUG(std::cerr << "\n");
+              return;
+           }
+       }
+
+       // The other case we handle is when a virtual register lives to the end
+       // of the defining block, potentially live across some blocks, then is
+       // live into some number of blocks, but gets killed.  Start by adding a
+       // range that goes from this definition to the end of the defining block.
+       interval.addRange(defIndex, 
+                         getInstructionIndex(&mbb->back()) + InstrSlots::NUM);
+
+       // Iterate over all of the blocks that the variable is completely
+       // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+       // live interval.
+       for (unsigned i = 0, e = vi.AliveBlocks.size(); i != e; ++i) {
+           if (vi.AliveBlocks[i]) {
+               MachineBasicBlock* mbb = mf_->getBlockNumbered(i);
+               if (!mbb->empty()) {
+                   interval.addRange(
+                       getInstructionIndex(&mbb->front()),
+                       getInstructionIndex(&mbb->back()) + InstrSlots::NUM);
+               }
+           }
+       }
+
+       // Finally, this virtual register is live from the start of any killing
+       // block to the 'use' slot of the killing instruction.
+       for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+           MachineInstr *Kill = vi.Kills[i];
+           interval.addRange(getInstructionIndex(Kill->getParent()->begin()),
+                             getUseIndex(getInstructionIndex(Kill))+1);
+       }
+
+    } else {
+       // If this is the second time we see a virtual register definition, it
+       // must be due to phi elimination or two addr elimination.  If this is
+       // the result of two address elimination, then the vreg is the first
+       // operand, and is a def-and-use.
+       if (mi->getOperand(0).isRegister() && 
+           mi->getOperand(0).getReg() == interval.reg &&
+           mi->getOperand(0).isDef() && mi->getOperand(0).isUse()) {
+         // If this is a two-address definition, just ignore it.
+       } else {
+         // Otherwise, this must be because of phi elimination.  In this case, 
+         // the defined value will be live until the end of the basic block it
+         // is defined in.
+         unsigned defIndex = getDefIndex(getInstructionIndex(mi));
+         interval.addRange(defIndex, 
+                           getInstructionIndex(&mbb->back()) + InstrSlots::NUM);
+       }
+       interval.isDefinedOnce = false;
     }
+
+    DEBUG(std::cerr << '\n');
 }
 
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock* mbb,
                                               MachineBasicBlock::iterator mi,
-                                              unsigned reg)
+                                              LiveInterval& interval)
 {
+    // A physical register cannot be live across basic block, so its
+    // lifetime must end somewhere in its defining basic block.
+    DEBUG(std::cerr << "\t\tregister: "; printRegName(interval.reg));
     typedef LiveVariables::killed_iterator KillIter;
 
-    DEBUG(std::cerr << "\t\tregister: "; printRegName(reg));
-
     MachineBasicBlock::iterator e = mbb->end();
-    unsigned start = getInstructionIndex(*mi);
-    unsigned end = start + 1;
-
-    // a variable can be dead by the instruction defining it
-    for (KillIter ki = lv_->dead_begin(*mi), ke = lv_->dead_end(*mi);
+    unsigned baseIndex = getInstructionIndex(mi);
+    unsigned start = getDefIndex(baseIndex);
+    unsigned end = start;
+
+    // If it is not used after definition, it is considered dead at
+    // the instruction defining it. Hence its interval is:
+    // [defSlot(def), defSlot(def)+1)
+    for (KillIter ki = lv_->dead_begin(mi), ke = lv_->dead_end(mi);
          ki != ke; ++ki) {
-        if (reg == ki->second) {
-            DEBUG(std::cerr << " dead\n");
+        if (interval.reg == ki->second) {
+            DEBUG(std::cerr << " dead");
+            end = getDefIndex(start) + 1;
             goto exit;
         }
     }
 
-    // a variable can only be killed by subsequent instructions
+    // If it is not dead on definition, it must be killed by a
+    // subsequent instruction. Hence its interval is:
+    // [defSlot(def), useSlot(kill)+1)
     do {
         ++mi;
-        ++end;
-        for (KillIter ki = lv_->killed_begin(*mi), ke = lv_->killed_end(*mi);
+        baseIndex += InstrSlots::NUM;
+        for (KillIter ki = lv_->killed_begin(mi), ke = lv_->killed_end(mi);
              ki != ke; ++ki) {
-            if (reg == ki->second) {
-                DEBUG(std::cerr << " killed\n");
+            if (interval.reg == ki->second) {
+                DEBUG(std::cerr << " killed");
+                end = getUseIndex(baseIndex) + 1;
                 goto exit;
             }
         }
@@ -240,17 +410,8 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock* mbb,
 
 exit:
     assert(start < end && "did not find end of interval?");
-
-    Reg2IntervalMap::iterator r2iit = r2iMap_.lower_bound(reg);
-    if (r2iit != r2iMap_.end() && r2iit->first == reg) {
-        r2iit->second->addRange(start, end);
-    }
-    else {
-        intervals_.push_back(Interval(reg));
-        // update interval index for this register
-        r2iMap_.insert(r2iit, std::make_pair(reg, --intervals_.end()));
-        intervals_.back().addRange(start, end);
-    }
+    interval.addRange(start, end);
+    DEBUG(std::cerr << '\n');
 }
 
 void LiveIntervals::handleRegisterDef(MachineBasicBlock* mbb,
@@ -259,21 +420,29 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock* mbb,
 {
     if (MRegisterInfo::isPhysicalRegister(reg)) {
         if (lv_->getAllocatablePhysicalRegisters()[reg]) {
-            handlePhysicalRegisterDef(mbb, mi, reg);
+            handlePhysicalRegisterDef(mbb, mi, getOrCreateInterval(reg));
             for (const unsigned* as = mri_->getAliasSet(reg); *as; ++as)
-                handlePhysicalRegisterDef(mbb, mi, *as);
+                handlePhysicalRegisterDef(mbb, mi, getOrCreateInterval(*as));
         }
     }
-    else {
-        handleVirtualRegisterDef(mbb, mi, reg);
-    }
+    else
+        handleVirtualRegisterDef(mbb, mi, getOrCreateInterval(reg));
 }
 
 unsigned LiveIntervals::getInstructionIndex(MachineInstr* instr) const
 {
-    assert(mi2iMap_.find(instr) != mi2iMap_.end() &&
-           "instruction not assigned a number");
-    return mi2iMap_.find(instr)->second;
+    Mi2IndexMap::const_iterator it = mi2iMap_.find(instr);
+    return (it == mi2iMap_.end() ?
+            std::numeric_limits<unsigned>::max() :
+            it->second);
+}
+
+MachineInstr* LiveIntervals::getInstructionFromIndex(unsigned index) const
+{
+    index /= InstrSlots::NUM; // convert index to vector index
+    assert(index < i2miMap_.size() &&
+           "index does not correspond to an instruction");
+    return i2miMap_[index];
 }
 
 /// computeIntervals - computes the live intervals for virtual
@@ -282,33 +451,32 @@ unsigned LiveIntervals::getInstructionIndex(MachineInstr* instr) const
 /// which a variable is live
 void LiveIntervals::computeIntervals()
 {
-    DEBUG(std::cerr << "computing live intervals:\n");
+    DEBUG(std::cerr << "********** COMPUTING LIVE INTERVALS **********\n");
+    DEBUG(std::cerr << "********** Function: "
+          << ((Value*)mf_->getFunction())->getName() << '\n');
 
-    for (MbbIndex2MbbMap::iterator
-             it = mbbi2mbbMap_.begin(), itEnd = mbbi2mbbMap_.end();
-         it != itEnd; ++it) {
-        MachineBasicBlock* mbb = it->second;
-        DEBUG(std::cerr << "machine basic block: "
-              << mbb->getBasicBlock()->getName() << "\n");
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); 
+         I != E; ++I) {
+        MachineBasicBlock* mbb = I;
+        DEBUG(std::cerr << ((Value*)mbb->getBasicBlock())->getName() << ":\n");
 
         for (MachineBasicBlock::iterator mi = mbb->begin(), miEnd = mbb->end();
              mi != miEnd; ++mi) {
-            MachineInstr* instr = *mi;
             const TargetInstrDescriptor& tid =
-                tm_->getInstrInfo().get(instr->getOpcode());
-            DEBUG(std::cerr << "\t[" << getInstructionIndex(instr) << "] ";
-                  instr->print(std::cerr, *tm_););
+                tm_->getInstrInfo()->get(mi->getOpcode());
+            DEBUG(std::cerr << getInstructionIndex(mi) << "\t";
+                  mi->print(std::cerr, tm_));
 
             // handle implicit defs
             for (const unsigned* id = tid.ImplicitDefs; *id; ++id)
                 handleRegisterDef(mbb, mi, *id);
 
             // handle explicit defs
-            for (int i = instr->getNumOperands() - 1; i >= 0; --i) {
-                MachineOperand& mop = instr->getOperand(i);
+            for (int i = mi->getNumOperands() - 1; i >= 0; --i) {
+                MachineOperand& mop = mi->getOperand(i);
                 // handle register defs - build intervals
-                if (mop.isRegister() && mop.isDef())
-                    handleRegisterDef(mbb, mi, mop.getAllocatedRegNum());
+                if (mop.isRegister() && mop.getReg() && mop.isDef())
+                    handleRegisterDef(mbb, mi, mop.getReg());
             }
         }
     }
@@ -322,99 +490,152 @@ unsigned LiveIntervals::rep(unsigned reg)
     return reg;
 }
 
-void LiveIntervals::joinIntervals()
-{
-    DEBUG(std::cerr << "joining compatible intervals:\n");
+void LiveIntervals::joinIntervalsInMachineBB(MachineBasicBlock *MBB) {
+    DEBUG(std::cerr << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
+    const TargetInstrInfo& tii = *tm_->getInstrInfo();
+
+    for (MachineBasicBlock::iterator mi = MBB->begin(), mie = MBB->end();
+         mi != mie; ++mi) {
+        const TargetInstrDescriptor& tid = tii.get(mi->getOpcode());
+        DEBUG(std::cerr << getInstructionIndex(mi) << '\t';
+              mi->print(std::cerr, tm_););
+
+        // we only join virtual registers with allocatable
+        // physical registers since we do not have liveness information
+        // on not allocatable physical registers
+        unsigned regA, regB;
+        if (tii.isMoveInstr(*mi, regA, regB) &&
+            (MRegisterInfo::isVirtualRegister(regA) ||
+             lv_->getAllocatablePhysicalRegisters()[regA]) &&
+            (MRegisterInfo::isVirtualRegister(regB) ||
+             lv_->getAllocatablePhysicalRegisters()[regB])) {
+
+            // get representative registers
+            regA = rep(regA);
+            regB = rep(regB);
+
+            // if they are already joined we continue
+            if (regA == regB)
+                continue;
+
+            Reg2IntervalMap::iterator r2iA = r2iMap_.find(regA);
+            assert(r2iA != r2iMap_.end() &&
+                   "Found unknown vreg in 'isMoveInstr' instruction");
+            Reg2IntervalMap::iterator r2iB = r2iMap_.find(regB);
+            assert(r2iB != r2iMap_.end() &&
+                   "Found unknown vreg in 'isMoveInstr' instruction");
+
+            Intervals::iterator intA = r2iA->second;
+            Intervals::iterator intB = r2iB->second;
+
+            DEBUG(std::cerr << "\t\tInspecting " << *intA << " and " << *intB 
+                            << ": ");
+
+            // both A and B are virtual registers
+            if (MRegisterInfo::isVirtualRegister(intA->reg) &&
+                MRegisterInfo::isVirtualRegister(intB->reg)) {
+
+                const TargetRegisterClass *rcA, *rcB;
+                rcA = mf_->getSSARegMap()->getRegClass(intA->reg);
+                rcB = mf_->getSSARegMap()->getRegClass(intB->reg);
+
+                // if they are not of the same register class we continue
+                if (rcA != rcB) {
+                    DEBUG(std::cerr << "Differing reg classes.\n");
+                    continue;
+                }
 
-    const TargetInstrInfo& tii = tm_->getInstrInfo();
+                // if their intervals do not overlap we join them
+                if ((intA->isDefinedOnce && intB->isDefinedOnce) ||
+                    !intB->overlaps(*intA)) {
+                    intA->join(*intB);
+                    DEBUG(std::cerr << "Joined.  Result = " << *intA << "\n");
+                    r2iB->second = r2iA->second;
+                    r2rMap_.insert(std::make_pair(intB->reg, intA->reg));
+                    intervals_.erase(intB);
+                } else {
+                    DEBUG(std::cerr << "Interference!\n");
+                }
+            } else if (!MRegisterInfo::isPhysicalRegister(intA->reg) ||
+                       !MRegisterInfo::isPhysicalRegister(intB->reg)) {
+                if (MRegisterInfo::isPhysicalRegister(intB->reg)) {
+                    std::swap(regA, regB);
+                    std::swap(intA, intB);
+                    std::swap(r2iA, r2iB);
+                }
 
-    for (MachineFunction::const_iterator mbbi = mf_->begin(),
-             mbbe = mf_->end(); mbbi != mbbe; ++mbbi) {
-        const MachineBasicBlock* mbb = mbbi;
-        DEBUG(std::cerr << "machine basic block: "
-              << mbb->getBasicBlock()->getName() << "\n");
+                assert(MRegisterInfo::isPhysicalRegister(intA->reg) &&
+                       MRegisterInfo::isVirtualRegister(intB->reg) &&
+                       "A must be physical and B must be virtual");
 
-        for (MachineBasicBlock::const_iterator mii = mbb->begin(),
-                 mie = mbb->end(); mii != mie; ++mii) {
-            MachineInstr* mi = *mii;
-            const TargetInstrDescriptor& tid =
-                tm_->getInstrInfo().get(mi->getOpcode());
-            DEBUG(std::cerr << "\t\tinstruction["
-                  << getInstructionIndex(mi) << "]: ";
-                  mi->print(std::cerr, *tm_););
-
-            // we only join virtual registers with allocatable
-            // physical registers since we do not have liveness information
-            // on not allocatable physical registers
-            unsigned regA, regB;
-            if (tii.isMoveInstr(*mi, regA, regB) &&
-                (MRegisterInfo::isVirtualRegister(regA) ||
-                 lv_->getAllocatablePhysicalRegisters()[regA]) &&
-                (MRegisterInfo::isVirtualRegister(regB) ||
-                 lv_->getAllocatablePhysicalRegisters()[regB])) {
-
-                // get representative registers
-                regA = rep(regA);
-                regB = rep(regB);
-
-                // if they are already joined we continue
-                if (regA == regB)
+                const TargetRegisterClass *rcA, *rcB;
+                rcA = mri_->getRegClass(intA->reg);
+                rcB = mf_->getSSARegMap()->getRegClass(intB->reg);
+                // if they are not of the same register class we continue
+                if (rcA != rcB) {
+                    DEBUG(std::cerr << "Differing reg classes.\n");
                     continue;
-
-                Reg2IntervalMap::iterator r2iA = r2iMap_.find(regA);
-                assert(r2iA != r2iMap_.end());
-                Reg2IntervalMap::iterator r2iB = r2iMap_.find(regB);
-                assert(r2iB != r2iMap_.end());
-
-                Intervals::iterator intA = r2iA->second;
-                Intervals::iterator intB = r2iB->second;
-
-                // both A and B are virtual registers
-                if (MRegisterInfo::isVirtualRegister(intA->reg) &&
-                    MRegisterInfo::isVirtualRegister(intB->reg)) {
-
-                    const TargetRegisterClass *rcA, *rcB;
-                    rcA = mf_->getSSARegMap()->getRegClass(intA->reg);
-                    rcB = mf_->getSSARegMap()->getRegClass(intB->reg);
-                    assert(rcA == rcB && "registers must be of the same class");
-
-                    // if their intervals do not overlap we join them
-                    if (!intB->overlaps(*intA)) {
-                        intA->join(*intB);
-                        r2iB->second = r2iA->second;
-                        r2rMap_.insert(std::make_pair(intB->reg, intA->reg));
-                        intervals_.erase(intB);
-                        ++numJoined;
-                    }
                 }
-                else if (MRegisterInfo::isPhysicalRegister(intA->reg) ^
-                         MRegisterInfo::isPhysicalRegister(intB->reg)) {
-                    if (MRegisterInfo::isPhysicalRegister(intB->reg)) {
-                        std::swap(regA, regB);
-                        std::swap(intA, intB);
-                        std::swap(r2iA, r2iB);
-                    }
 
-                    assert(MRegisterInfo::isPhysicalRegister(intA->reg) &&
-                           MRegisterInfo::isVirtualRegister(intB->reg) &&
-                           "A must be physical and B must be virtual");
-
-                    if (!intA->overlaps(*intB) &&
-                         !overlapsAliases(*intA, *intB)) {
-                        intA->join(*intB);
-                        r2iB->second = r2iA->second;
-                        r2rMap_.insert(std::make_pair(intB->reg, intA->reg));
-                        intervals_.erase(intB);
-                        ++numJoined;
-                    }
+                if (!intA->overlaps(*intB) &&
+                    !overlapsAliases(*intA, *intB)) {
+                    intA->join(*intB);
+                    DEBUG(std::cerr << "Joined.  Result = " << *intA << "\n");
+                    r2iB->second = r2iA->second;
+                    r2rMap_.insert(std::make_pair(intB->reg, intA->reg));
+                    intervals_.erase(intB);
+                } else {
+                    DEBUG(std::cerr << "Interference!\n");
                 }
+            } else {
+                DEBUG(std::cerr << "Cannot join physregs.\n");
             }
         }
     }
 }
 
-bool LiveIntervals::overlapsAliases(const Interval& lhs,
-                                    const Interval& rhs) const
+namespace {
+  // DepthMBBCompare - Comparison predicate that sort first based on the loop
+  // depth of the basic block (the unsigned), and then on the MBB number.
+  struct DepthMBBCompare {
+    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+      if (LHS.first > RHS.first) return true;   // Deeper loops first
+      return LHS.first == RHS.first && 
+             LHS.second->getNumber() < RHS.second->getNumber();
+    }
+  };
+}
+
+void LiveIntervals::joinIntervals() {
+  DEBUG(std::cerr << "********** JOINING INTERVALS ***********\n");
+
+  const LoopInfo &LI = getAnalysis<LoopInfo>();
+  if (LI.begin() == LI.end()) {
+    // If there are no loops in the function, join intervals in function order.
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+         I != E; ++I)
+      joinIntervalsInMachineBB(I);
+  } else {
+    // Otherwise, join intervals in inner loops before other intervals.
+    // Unfortunately we can't just iterate over loop hierarchy here because
+    // there may be more MBB's than BB's.  Collect MBB's for sorting.
+    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+         I != E; ++I)
+      MBBs.push_back(std::make_pair(LI.getLoopDepth(I->getBasicBlock()), I));
+
+    // Sort by loop depth.
+    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+    // Finally, join intervals in loop nest order. 
+    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+      joinIntervalsInMachineBB(MBBs[i].second);
+  }
+}
+
+bool LiveIntervals::overlapsAliases(const LiveInterval& lhs,
+                                    const LiveInterval& rhs) const
 {
     assert(MRegisterInfo::isPhysicalRegister(lhs.reg) &&
            "first interval must describe a physical register");
@@ -429,24 +650,37 @@ bool LiveIntervals::overlapsAliases(const Interval& lhs,
     return false;
 }
 
-LiveIntervals::Interval::Interval(unsigned r)
-    : reg(r),
-      weight((MRegisterInfo::isPhysicalRegister(r) ?
-              std::numeric_limits<float>::max() : 0.0F))
+LiveInterval& LiveIntervals::getOrCreateInterval(unsigned reg)
 {
+    Reg2IntervalMap::iterator r2iit = r2iMap_.lower_bound(reg);
+    if (r2iit == r2iMap_.end() || r2iit->first != reg) {
+        intervals_.push_back(LiveInterval(reg));
+        r2iit = r2iMap_.insert(r2iit, std::make_pair(reg, --intervals_.end()));
+    }
+
+    return *r2iit->second;
+}
 
+LiveInterval::LiveInterval(unsigned r)
+    : reg(r),
+      weight((MRegisterInfo::isPhysicalRegister(r) ?  HUGE_VAL : 0.0F)),
+      isDefinedOnce(false) {
+}
+
+bool LiveInterval::spilled() const
+{
+    return (weight == HUGE_VAL &&
+            MRegisterInfo::isVirtualRegister(reg));
 }
 
-// This example is provided becaues liveAt() is non-obvious:
+// An example for liveAt():
 //
-// this = [1,2), liveAt(1) will return false. The idea is that the
-// variable is defined in 1 and not live after definition. So it was
-// dead to begin with (defined but never used).
+// this = [1,4), liveAt(0) will return false. The instruction defining
+// this spans slots [0,3]. The interval belongs to an spilled
+// definition of the variable it represents. This is because slot 1 is
+// used (def slot) and spans up to slot 3 (store slot).
 //
-// this = [1,3), liveAt(2) will return false. The variable is used at
-// 2 but 2 is the last use so the variable's allocated register is
-// available for reuse.
-bool LiveIntervals::Interval::liveAt(unsigned index) const
+bool LiveInterval::liveAt(unsigned index) const
 {
     Range dummy(index, index+1);
     Ranges::const_iterator r = std::upper_bound(ranges.begin(),
@@ -456,24 +690,24 @@ bool LiveIntervals::Interval::liveAt(unsigned index) const
         return false;
 
     --r;
-    return index >= r->first && index < (r->second - 1);
+    return index >= r->first && index < r->second;
 }
 
-// This example is provided because overlaps() is non-obvious:
+// An example for overlaps():
 //
 // 0: A = ...
-// 1: B = ...
-// 2: C = A + B ;; last use of A
+// 4: B = ...
+// 8: C = A + B ;; last use of A
 //
 // The live intervals should look like:
 //
-// A = [0, 3)
-// B = [1, x)
-// C = [2, y)
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
 //
 // A->overlaps(C) should return false since we want to be able to join
 // A and C.
-bool LiveIntervals::Interval::overlaps(const Interval& other) const
+bool LiveInterval::overlaps(const LiveInterval& other) const
 {
     Ranges::const_iterator i = ranges.begin();
     Ranges::const_iterator ie = ranges.end();
@@ -499,7 +733,7 @@ bool LiveIntervals::Interval::overlaps(const Interval& other) const
             }
             assert(i->first < j->first);
 
-            if ((i->second - 1) > j->first) {
+            if (i->second > j->first) {
                 return true;
             }
             else {
@@ -511,10 +745,10 @@ bool LiveIntervals::Interval::overlaps(const Interval& other) const
     return false;
 }
 
-void LiveIntervals::Interval::addRange(unsigned start, unsigned end)
+void LiveInterval::addRange(unsigned start, unsigned end)
 {
     assert(start < end && "Invalid range to add!");
-    DEBUG(std::cerr << "\t\t\tadding range: [" << start <<','<< end << ") -> ");
+    DEBUG(std::cerr << " +[" << start << ',' << end << ")");
     //assert(start < end && "invalid range?");
     Range range = std::make_pair(start, end);
     Ranges::iterator it =
@@ -523,14 +757,12 @@ void LiveIntervals::Interval::addRange(unsigned start, unsigned end)
 
     it = mergeRangesForward(it);
     it = mergeRangesBackward(it);
-    DEBUG(std::cerr << "\t\t\t\tafter merging: " << *this << '\n');
 }
 
-void LiveIntervals::Interval::join(const LiveIntervals::Interval& other)
+void LiveInterval::join(const LiveInterval& other)
 {
-    DEBUG(std::cerr << "\t\t\t\tjoining intervals: "
-          << other << " and " << *this << '\n');
     Ranges::iterator cur = ranges.begin();
+    isDefinedOnce &= other.isDefinedOnce;
 
     for (Ranges::const_iterator i = other.ranges.begin(),
              e = other.ranges.end(); i != e; ++i) {
@@ -538,43 +770,47 @@ void LiveIntervals::Interval::join(const LiveIntervals::Interval& other)
         cur = mergeRangesForward(cur);
         cur = mergeRangesBackward(cur);
     }
-    if (MRegisterInfo::isVirtualRegister(reg))
-        weight += other.weight;
-
-    DEBUG(std::cerr << "\t\t\t\tafter merging: " << *this << '\n');
+    weight += other.weight;
+    ++numJoins;
 }
 
-LiveIntervals::Interval::Ranges::iterator
-LiveIntervals::Interval::mergeRangesForward(Ranges::iterator it)
+LiveInterval::Ranges::iterator LiveInterval::
+mergeRangesForward(Ranges::iterator it)
 {
-    for (Ranges::iterator next = it + 1;
-         next != ranges.end() && it->second >= next->first; ) {
-        it->second = std::max(it->second, next->second);
-        next = ranges.erase(next);
+    Ranges::iterator n;
+    while ((n = next(it)) != ranges.end()) {
+        if (n->first > it->second)
+            break;
+        it->second = std::max(it->second, n->second);
+        n = ranges.erase(n);
     }
     return it;
 }
 
-LiveIntervals::Interval::Ranges::iterator
-LiveIntervals::Interval::mergeRangesBackward(Ranges::iterator it)
+LiveInterval::Ranges::iterator LiveInterval::
+mergeRangesBackward(Ranges::iterator it)
 {
     while (it != ranges.begin()) {
-        Ranges::iterator prev = it - 1;
-        if (it->first > prev->second) break;
+        Ranges::iterator p = prior(it);
+        if (it->first > p->second)
+            break;
 
-        it->first = std::min(it->first, prev->first);
-        it->second = std::max(it->second, prev->second);
-        it = ranges.erase(prev);
+        it->first = std::min(it->first, p->first);
+        it->second = std::max(it->second, p->second);
+        it = ranges.erase(p);
     }
 
     return it;
 }
 
-std::ostream& llvm::operator<<(std::ostream& os,
-                               const LiveIntervals::Interval& li)
+std::ostream& llvm::operator<<(std::ostream& os, const LiveInterval& li)
 {
-    os << "%reg" << li.reg << ',' << li.weight << " = ";
-    for (LiveIntervals::Interval::Ranges::const_iterator
+    os << "%reg" << li.reg << ',' << li.weight;
+    if (li.empty())
+        return os << "EMPTY";
+
+    os << " = ";
+    for (LiveInterval::Ranges::const_iterator
              i = li.ranges.begin(), e = li.ranges.end(); i != e; ++i) {
         os << "[" << i->first << "," << i->second << ")";
     }