From 5211134fbd35bf0befc60888860010b23c27ee5a Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Wed, 14 Dec 2011 23:20:38 +0000 Subject: [PATCH] Do not sink instruction, if it is not profitable. On ARM, peephole optimization for ABS creates a trivial cfg triangle which tempts machine sink to sink instructions in code which is really straight line code. Sometimes this sinking may alter register allocator input such that use and def of a reg is divided by a branch in between, which may result in extra spills. Now mahine sink avoids sinking if final sink destination is post dominator. Radar 10266272. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146604 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineSink.cpp | 89 ++++++++++++++++++--- test/CodeGen/ARM/2011-12-14-machine-sink.ll | 48 +++++++++++ 2 files changed, 124 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/ARM/2011-12-14-machine-sink.ll diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 3837f6d888d..e47360dbba5 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -90,7 +90,11 @@ namespace { bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; - MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, bool &BreakPHIEdge); + MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB, + bool &BreakPHIEdge); + bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo); bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); @@ -399,18 +403,76 @@ static void collectDebugValues(MachineInstr *MI, } } +/// isPostDominatedBy - Return true if A is post dominated by B. +static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) { + + // FIXME - Use real post dominator. + if (A->succ_size() != 2) + return false; + MachineBasicBlock::succ_iterator I = A->succ_begin(); + if (B == *I) + ++I; + MachineBasicBlock *OtherSuccBlock = *I; + if (OtherSuccBlock->succ_size() != 1 || + *(OtherSuccBlock->succ_begin()) != B) + return false; + + return true; +} + +/// isProfitableToSinkTo - Return true if it is profitable to sink MI. +bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo) { + assert (MI && "Invalid MachineInstr!"); + assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); + + if (MBB == SuccToSinkTo) + return false; + + // It is profitable if SuccToSinkTo does not post dominate current block. + if (!isPostDominatedBy(MBB, SuccToSinkTo)) + return true; + + // Check if only use in post dominated block is PHI instruction. + bool NonPHIUse = false; + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseBlock == SuccToSinkTo && !UseInst->isPHI()) + NonPHIUse = true; + } + if (!NonPHIUse) + return true; + + // If SuccToSinkTo post dominates then also it may be profitable if MI + // can further profitably sinked into another block in next round. + bool BreakPHIEdge = false; + // FIXME - If finding successor is compile time expensive then catch results. + if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) + return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); + + // If SuccToSinkTo is final destination and it is a post dominator of current + // block then it is not profitable to sink MI into SuccToSinkTo block. + return false; +} + /// FindSuccToSinkTo - Find a successor to sink this instruction to. MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, - bool &BreakPHIEdge) { + MachineBasicBlock *MBB, + bool &BreakPHIEdge) { + + assert (MI && "Invalid MachineInstr!"); + assert (MBB && "Invalid MachineBasicBlock!"); // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. - MachineBasicBlock *ParentBlock = MI->getParent(); // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. @@ -469,7 +531,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. bool LocalUse = false; - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) return NULL; @@ -478,11 +540,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), - E = ParentBlock->succ_end(); SI != E; ++SI) { - MachineBasicBlock *SuccBlock = *SI; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; - if (AllUsesDominatedByBlock(Reg, SuccBlock, ParentBlock, + if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, BreakPHIEdge, LocalUse)) { SuccToSinkTo = SuccBlock; break; @@ -495,12 +557,14 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) return NULL; + else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) + return NULL; } } // It is not possible to sink an instruction into its own block. This can // happen with loops. - if (ParentBlock == SuccToSinkTo) + if (MBB == SuccToSinkTo) return NULL; // It's not safe to sink instructions to EH landing pad. Control flow into @@ -532,7 +596,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // and z and only shrink the live range of x. bool BreakPHIEdge = false; - MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, BreakPHIEdge); + MachineBasicBlock *ParentBlock = MI->getParent(); + MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); // If there are no outputs, it must have side-effects. if (SuccToSinkTo == 0) @@ -553,8 +618,6 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); - MachineBasicBlock *ParentBlock = MI->getParent(); - // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll new file mode 100644 index 00000000000..5ce600d1a93 --- /dev/null +++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS +; Radar 10266272 +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios4.0.0" +; STATS-NOT: machine-sink + +define i32 @foo(i32 %h) nounwind readonly ssp { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %cmp = icmp slt i32 0, %h + br i1 %cmp, label %for.body, label %if.end299 + +for.body: ; preds = %for.cond + %v.5 = select i1 undef, i32 undef, i32 0 + %0 = load i8* undef, align 1, !tbaa !0 + %conv88 = zext i8 %0 to i32 + %sub89 = sub nsw i32 0, %conv88 + %v.8 = select i1 undef, i32 undef, i32 %sub89 + %1 = load i8* null, align 1, !tbaa !0 + %conv108 = zext i8 %1 to i32 + %2 = load i8* undef, align 1, !tbaa !0 + %conv110 = zext i8 %2 to i32 + %sub111 = sub nsw i32 %conv108, %conv110 + %cmp112 = icmp slt i32 %sub111, 0 + %sub115 = sub nsw i32 0, %sub111 + %v.10 = select i1 %cmp112, i32 %sub115, i32 %sub111 + %add62 = add i32 0, %v.5 + %add73 = add i32 %add62, 0 + %add84 = add i32 %add73, 0 + %add95 = add i32 %add84, %v.8 + %add106 = add i32 %add95, 0 + %add117 = add i32 %add106, %v.10 + %add128 = add i32 %add117, 0 + %add139 = add i32 %add128, 0 + %add150 = add i32 %add139, 0 + %add161 = add i32 %add150, 0 + %add172 = add i32 %add161, 0 + br i1 undef, label %for.cond, label %if.end299 + +if.end299: ; preds = %for.body, %for.cond + %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ] + ret i32 %s.10 +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} -- 2.34.1