From: Jakob Stoklund Olesen Date: Wed, 9 Nov 2011 01:06:56 +0000 (+0000) Subject: Collapse DomainValues across loop back-edges. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=f4c4768fb2277cb940a90cb2f0e9a747ebc671c3 Collapse DomainValues across loop back-edges. During the initial RPO traversal of the basic blocks, remember the ones that are incomplete because of back-edges from predecessors that haven't been visited yet. After the initial RPO, revisit all those loop headers so the incoming DomainValues on the back-edges can be properly collapsed. This will properly fix execution domains on software pipelined code, like the included test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144151 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index c25f7db26c1..fc0b6124641 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -160,7 +160,7 @@ private: void collapse(DomainValue *dv, unsigned domain); bool merge(DomainValue *A, DomainValue *B); - void enterBasicBlock(MachineBasicBlock*); + bool enterBasicBlock(MachineBasicBlock*); void leaveBasicBlock(MachineBasicBlock*); void visitInstr(MachineInstr*); void visitGenericInstr(MachineInstr*); @@ -317,7 +317,13 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { return true; } -void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { +// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +// Return true if some predecessor hasn't been processed yet (like on a loop +// back-edge). +bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { + // Detect back-edges from predecessors we haven't processed yet. + bool seenBackEdge = false; + // Try to coalesce live-out registers from predecessors. for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { @@ -326,7 +332,12 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), pe = MBB->pred_end(); pi != pe; ++pi) { LiveOutMap::const_iterator fi = LiveOuts.find(*pi); - if (fi == LiveOuts.end()) continue; + if (fi == LiveOuts.end()) { + seenBackEdge = true; + continue; + } + if (!fi->second) + continue; DomainValue *pdv = resolve(fi->second[rx]); if (!pdv) continue; if (!LiveRegs || !LiveRegs[rx]) { @@ -350,12 +361,19 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { force(rx, pdv->getFirstDomain()); } } + return seenBackEdge; } void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { // Save live registers at end of MBB - used by enterBasicBlock(). - if (LiveRegs) - LiveOuts.insert(std::make_pair(MBB, LiveRegs)); + // Also use LiveOuts as a visited set to detect back-edges. + if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) { + // Insertion failed, this must be the second pass. + // Release all the DomainValues instead of keeping them. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + release(LiveRegs[i]); + delete[] LiveRegs; + } LiveRegs = 0; } @@ -545,23 +563,32 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *Entry = MF->begin(); ReversePostOrderTraversal RPOT(Entry); + SmallVector Loops; for (ReversePostOrderTraversal::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; - enterBasicBlock(MBB); + if (enterBasicBlock(MBB)) + Loops.push_back(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) visitInstr(I); leaveBasicBlock(MBB); } + // Visit all the loop blocks again in order to merge DomainValues from + // back-edges. + for (unsigned i = 0, e = Loops.size(); i != e; ++i) { + MachineBasicBlock *MBB = Loops[i]; + enterBasicBlock(MBB); + leaveBasicBlock(MBB); + } + // Clear the LiveOuts vectors and collapse any remaining DomainValues. for (ReversePostOrderTraversal::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); - if (FI == LiveOuts.end()) + if (FI == LiveOuts.end() || !FI->second) continue; - assert(FI->second && "Null entry"); for (unsigned i = 0, e = NumRegs; i != e; ++i) if (FI->second[i]) release(FI->second[i]); diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll new file mode 100644 index 00000000000..d26d32287e0 --- /dev/null +++ b/test/CodeGen/X86/sse-domains.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7" + +; CHECK: f +; +; This function contains load / store / and operations that all can execute in +; any domain. The only domain-specific operation is the %add = shl... operation +; which is <4 x i32>. +; +; The paddd instruction can only influence the other operations through the loop +; back-edge. Check that everything is still moved into the integer domain. + +define void @f(<4 x i32>* nocapture %p, i32 %n) nounwind uwtable ssp { +entry: + br label %while.body + +; Materialize a zeroinitializer and a constant-pool load in the integer domain. +; The order is not important. +; CHECK: pxor +; CHECK: movdqa + +; The instructions in the loop must all be integer domain as well. +; CHECK: while.body +; CHECK: pand +; CHECK: movdqa +; CHECK: movdqa +; Finally, the controlling integer-only instruction. +; CHECK: paddd +while.body: + %p.addr.04 = phi <4 x i32>* [ %incdec.ptr, %while.body ], [ %p, %entry ] + %n.addr.03 = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %x.02 = phi <4 x i32> [ %add, %while.body ], [ zeroinitializer, %entry ] + %dec = add nsw i32 %n.addr.03, -1 + %and = and <4 x i32> %x.02, + %incdec.ptr = getelementptr inbounds <4 x i32>* %p.addr.04, i64 1 + store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16 + %0 = load <4 x i32>* %incdec.ptr, align 16 + %add = shl <4 x i32> %0, + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: + ret void +}