From 9ac743a4ee61cb845bbe22a2f6898f38c2adafce Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Mon, 10 Oct 2011 19:09:20 +0000 Subject: [PATCH] Add dominance check for the instruction being hoisted. For example, MachineLICM should not hoist a load that is not guaranteed to be executed. Radar 10254254. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineLICM.cpp | 30 +++++++++++++ test/CodeGen/ARM/lsr-unfolded-offset.ll | 3 +- test/CodeGen/X86/licm-dominance.ll | 56 +++++++++++++++++++++++++ test/CodeGen/X86/licm-nested.ll | 2 +- 4 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/X86/licm-dominance.ll diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index d310f252e28..f8cbe41b944 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -168,6 +168,11 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); + /// IsGuaranteedToExecute - check to make sure that the MI dominates + /// all of the exit blocks. If it doesn't, then there is a path out of the + /// loop which does not execute this instruction, so we can't hoist it. + bool IsGuaranteedToExecute(MachineInstr *MI); + /// HasAnyPHIUse - Return true if the specified register is used by any /// phi node. bool HasAnyPHIUse(unsigned Reg) const; @@ -1129,6 +1134,29 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, return false; } +/// IsGuaranteedToExecute - check to make sure that the instruction dominates +/// all of the exit blocks. If it doesn't, then there is a path out of the loop +/// which does not execute this instruction, so we can't hoist it. +bool MachineLICM::IsGuaranteedToExecute(MachineInstr *MI) { + // If the instruction is in the header block for the loop (which is very + // common), it is always guaranteed to dominate the exit blocks. Since this + // is a common case, and can save some work, check it now. + if (MI->getParent() == CurLoop->getHeader()) + return true; + + // Get the exit blocks for the current loop. + SmallVector ExitBlocks; + CurLoop->getExitingBlocks(ExitBlocks); + + // Verify that the block dominates each of the exit blocks of the loop. + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] != CurLoop->getHeader() && + !DT->dominates(MI->getParent(), ExitBlocks[i])) + return false; + + return true; +} + /// Hoist - When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. /// @@ -1139,6 +1167,8 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { MI = ExtractHoistableLoad(MI); if (!MI) return false; } + if (!IsGuaranteedToExecute(MI)) + return false; // Now move the instructions to the predecessor, inserting it before any // terminator instructions. diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll index 61b25bb94af..bf26a9670a7 100644 --- a/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -4,12 +4,11 @@ ; register pressure and therefore spilling. There is more room for improvement ; here. -; CHECK: sub sp, #{{32|28|24}} +; CHECK: sub sp, #{{40|32|28|24}} ; CHECK: %for.inc ; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, # ; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, # -; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, # ; CHECK: add target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" diff --git a/test/CodeGen/X86/licm-dominance.ll b/test/CodeGen/X86/licm-dominance.ll new file mode 100644 index 00000000000..cf3f2e87b74 --- /dev/null +++ b/test/CodeGen/X86/licm-dominance.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s | FileCheck %s + +; MachineLICM should check dominance before hoisting instructions. +; CHECK: xorb %cl, %cl +; CHECK-NEXT: testb %cl, %cl + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.2" + +define void @CMSColorWorldCreateParametricData() nounwind uwtable optsize ssp { +entry: + br label %for.body.i + +for.body.i: ; preds = %entry + br i1 undef, label %for.inc.i, label %land.lhs.true21.i + +land.lhs.true21.i: ; preds = %for.body.i + br i1 undef, label %if.then26.i, label %for.inc.i + +if.then26.i: ; preds = %land.lhs.true21.i + br i1 undef, label %if.else.i.i, label %if.then.i.i + +if.then.i.i: ; preds = %if.then26.i + unreachable + +if.else.i.i: ; preds = %if.then26.i + br i1 undef, label %lor.lhs.false.i.i, label %if.then116.i.i + +lor.lhs.false.i.i: ; preds = %if.else.i.i + br i1 undef, label %lor.lhs.false104.i.i, label %if.then116.i.i + +lor.lhs.false104.i.i: ; preds = %lor.lhs.false.i.i + br i1 undef, label %lor.lhs.false108.i.i, label %if.then116.i.i + +lor.lhs.false108.i.i: ; preds = %lor.lhs.false104.i.i + br i1 undef, label %lor.lhs.false112.i.i, label %if.then116.i.i + +lor.lhs.false112.i.i: ; preds = %lor.lhs.false108.i.i + br i1 undef, label %if.else232.i.i, label %if.then116.i.i + +if.then116.i.i: ; preds = %lor.lhs.false112.i.i, %lor.lhs.false108.i.i, %lor.lhs.false104.i.i, %lor.lhs.false.i.i, %if.else.i.i + unreachable + +if.else232.i.i: ; preds = %lor.lhs.false112.i.i + br label %for.inc.i + +for.inc.i: ; preds = %if.else232.i.i, %land.lhs.true21.i, %for.body.i + %cmp17.i = icmp ult i64 undef, undef + br i1 %cmp17.i, label %for.body.i, label %if.end28.i + +if.end28.i: ; preds = %for.inc.i, %if.then10.i, %if.then6.i + unreachable + +createTransformParams.exit: ; preds = %land.lhs.true3.i, %if.then.i, %land.lhs.true.i, %entry + ret void +} diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll index b0105ac533b..901d987e8d4 100644 --- a/test/CodeGen/X86/licm-nested.ll +++ b/test/CodeGen/X86/licm-nested.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3 +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 2 ; MachineLICM should be able to hoist the symbolic addresses out of ; the inner loops. -- 2.34.1