From d40d4c34f72d1eda3cd9ba0f3dbf2d43b726f06c Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 19 Sep 2012 21:29:18 +0000 Subject: [PATCH] Resolve conflicts involving dead vector lanes for -new-coalescer. A common coalescing conflict in vector code is lane insertion: %dst = FOO %src = BAR %dst:ssub0 = COPY %src The live range of %src interferes with the ssub0 lane of %dst, but that lane is never read after %src would have clobbered it. That makes it safe to merge the live ranges and eliminate the COPY: %dst = FOO %dst:ssub0 = BAR This patch teaches the new coalescer to resolve conflicts where dead vector lanes would be clobbered, at least as long as the clobbered vector lanes don't escape the basic block. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164250 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegisterCoalescer.cpp | 164 +++++++++++++++++++++++++-- test/CodeGen/ARM/coalesce-subregs.ll | 27 +++++ 2 files changed, 182 insertions(+), 9 deletions(-) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 67cad14afd3..b372b86cfa5 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -55,6 +55,8 @@ STATISTIC(numCommutes , "Number of instruction commuting performed"); STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(NumInflated , "Number of register classes inflated"); +STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); +STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); static cl::opt EnableJoining("join-liveintervals", @@ -1257,6 +1259,9 @@ class JoinVals { VNInfo *stripCopies(VNInfo *VNI); ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); void computeAssignment(unsigned ValNo, JoinVals &Other); + bool taintExtent(unsigned, unsigned, JoinVals&, + SmallVectorImpl >&); + bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); public: JoinVals(LiveInterval &li, unsigned subIdx, @@ -1409,8 +1414,8 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Keep; // Both sides have been analyzed now. Do they conflict? if (V.ValidLanes & OtherV.ValidLanes) - // Overlapping lanes can't be resolved now, maybe later. - return CR_Unresolved; + // Overlapping lanes can't be resolved. + return CR_Impossible; else return CR_Merge; } @@ -1474,9 +1479,28 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { if ((V.WriteLanes & OtherV.ValidLanes) == 0) return CR_Replace; - // FIXME: Identify CR_Replace opportunities where the clobbered lanes are - // dead. - return CR_Impossible; + // VNI is clobbering live lanes in OtherVNI, but there is still the + // possibility that no instructions actually read the clobbered lanes. + // If we're clobbering all the lanes in OtherVNI, at least one must be read. + // Otherwise Other.LI wouldn't be live here. + if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) + return CR_Impossible; + + // We need to verify that no instructions are reading the clobbered lanes. To + // save compile time, we'll only check that locally. Don't allow the tainted + // value to escape the basic block. + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) + return CR_Impossible; + + // There are still some things that could go wrong besides clobbered lanes + // being read, for example OtherVNI may be only partially redefined in MBB, + // and some clobbered lanes could escape the block. Save this analysis for + // resolveConflicts() when all values have been mapped. We need to know + // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute + // that now - the recursive analyzeValue() calls must go upwards in the + // dominator tree. + return CR_Unresolved; } /// Compute the value assignment for ValNo in LI. @@ -1523,15 +1547,137 @@ bool JoinVals::mapValues(JoinVals &Other) { return true; } +/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute +/// the extent of the tainted lanes in the block. +/// +/// Multiple values in Other.LI can be affected since partial redefinitions can +/// preserve previously tainted lanes. +/// +/// 1 %dst = VLOAD <-- Define all lanes in %dst +/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 +/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 +/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read +/// +/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) +/// entry to TaintedVals. +/// +/// Returns false if the tainted lanes extend beyond the basic block. +bool JoinVals:: +taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, + SmallVectorImpl > &TaintExtent) { + VNInfo *VNI = LI.getValNumInfo(ValNo); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); + + // Scan Other.LI from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LI.find(VNI->def); + assert(OtherI != Other.LI.end() && "No conflict?"); + do { + // OtherI is pointing to a tainted value. Abort the join if the tainted + // lanes escape the block. + SlotIndex End = OtherI->end; + if (End >= MBBEnd) { + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << '\n'); + return false; + } + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start + << " to " << End << '\n'); + // A dead def is not a problem. + if (End.isDead()) + break; + TaintExtent.push_back(std::make_pair(End, TaintedLanes)); + + // Check for another def in the MBB. + if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) + break; + + // Lanes written by the new def are no longer tainted. + const Val &OV = Other.Vals[OtherI->valno->id]; + TaintedLanes &= ~OV.WriteLanes; + if (!OV.RedefVNI) + break; + } while (TaintedLanes); + return true; +} + +/// Return true if MI uses any of the given Lanes from Reg. +/// This does not include partial redefinitions of Reg. +bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, + unsigned Lanes) { + if (MI->isDebugValue()) + return false; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) + continue; + if (!MO->readsReg()) + continue; + if (Lanes & + TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg()))) + return true; + } + return false; +} + bool JoinVals::resolveConflicts(JoinVals &Other) { for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { - assert (Vals[i].Resolution != CR_Impossible && "Unresolvable conflict"); - if (Vals[i].Resolution != CR_Unresolved) + Val &V = Vals[i]; + assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); + if (V.Resolution != CR_Unresolved) continue; - // FIXME: Actually resolve dead lane conflicts. DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i << '@' << LI.getValNumInfo(i)->def << '\n'); - return false; + ++NumLaneConflicts; + assert(V.OtherVNI && "Inconsistent conflict resolution."); + VNInfo *VNI = LI.getValNumInfo(i); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the + // join, those lanes will be tainted with a wrong value. Get the extent of + // the tainted lanes. + unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector, 8> TaintExtent; + if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) + // Tainted lanes would extend beyond the basic block. + return false; + + assert(!TaintExtent.empty() && "There should be at least one conflict."); + + // Now look at the instructions from VNI->def to TaintExtent (inclusive). + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + MachineBasicBlock::iterator MI = MBB->begin(); + if (!VNI->isPHIDef()) { + MI = Indexes->getInstructionFromIndex(VNI->def); + // No need to check the instruction defining VNI for reads. + ++MI; + } + assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && + "Interference ends on VNI->def. Should have been handled earlier"); + MachineInstr *LastMI = + Indexes->getInstructionFromIndex(TaintExtent.front().first); + assert(LastMI && "Range must end at a proper instruction"); + unsigned TaintNum = 0; + for(;;) { + assert(MI != MBB->end() && "Bad LastMI"); + if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { + DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); + return false; + } + // LastMI is the last instruction to use the current value. + if (&*MI == LastMI) { + if (++TaintNum == TaintExtent.size()) + break; + LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); + assert(LastMI && "Range must end at a proper instruction"); + TaintedLanes = TaintExtent[TaintNum].second; + } + ++MI; + } + + // The tainted lanes are unused. + V.Resolution = CR_Replace; + ++NumLaneResolves; } return true; } diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll index dfb5b17306a..e5a88be64d2 100644 --- a/test/CodeGen/ARM/coalesce-subregs.ll +++ b/test/CodeGen/ARM/coalesce-subregs.ll @@ -114,3 +114,30 @@ if.end: ; preds = %if.else, %if.then } declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind +declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly + +; CHECK: f4 +; This function inserts a lane into a fully defined vector. +; The destination lane isn't read, so the subregs can coalesce. +; CHECK-NOT: vmov +; CHECK-NOT: vorr +define void @f4(float* %p, float* %q) nounwind ssp { +entry: + %0 = bitcast float* %p to i8* + %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %0, i32 4) + %tobool = icmp eq float* %q, null + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %1 = load float* %q, align 4 + %arrayidx1 = getelementptr inbounds float* %q, i32 1 + %2 = load float* %arrayidx1, align 4 + %add = fadd float %1, %2 + %vecins = insertelement <2 x float> %vld1, float %add, i32 1 + br label %if.end + +if.end: ; preds = %entry, %if.then + %x.0 = phi <2 x float> [ %vecins, %if.then ], [ %vld1, %entry ] + tail call void @llvm.arm.neon.vst1.v2f32(i8* %0, <2 x float> %x.0, i32 4) + ret void +} -- 2.34.1