From c87c50a39c1bc27437352feee0f6aba2d50fa1b5 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 23 Jan 2011 22:04:55 +0000 Subject: [PATCH] Enhance SRoA to promote allocas that are used by selects in some common cases. This triggers a surprising number of times in SPEC2K6 because min/max idioms end up doing this. For example, code from the STL ends up looking like this to SRoA: %202 = load i64* %__old_size, align 8, !tbaa !3 %203 = load i64* %__old_size, align 8, !tbaa !3 %204 = load i64* %__n, align 8, !tbaa !3 %205 = icmp ult i64 %203, %204 %storemerge.i = select i1 %205, i64* %__n, i64* %__old_size %206 = load i64* %storemerge.i, align 8, !tbaa !3 We can now promote both the __n and the __old_size allocas. This addresses another chunk of rdar://7339113, poor codegen on stringswitch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124088 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/ScalarReplAggregates.cpp | 133 +++++++++++++++++- test/Transforms/ScalarRepl/phi-select.ll | 62 +++++++- 2 files changed, 190 insertions(+), 5 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 6b79695d6f3..60302f77947 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -31,6 +31,7 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -43,12 +44,14 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); +STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); @@ -862,6 +865,134 @@ public: }; } // end anon namespace +/// isSafeSelectToSpeculate - Select instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers and then +/// select between the result, allowing the load of the alloca to be promoted. +/// From this: +/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// %V2 = load i32* %Other +/// %V = select i1 %cond, i32 %V1, i32* %V2 +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast(*UI); + if (LI == 0 || LI->isVolatile()) return false; + + // Both operands to the select need to be deferencable, either absolutely + // (e.g. allocas) or at this point because we can see other accesses to it. + if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, + LI->getAlignment(), TD)) + return false; + if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, + LI->getAlignment(), TD)) + return false; + } + + return true; +} + + +/// tryToMakeAllocaBePromotable - This returns true if the alloca only has +/// direct (non-volatile) loads and stores to it. If the alloca is close but +/// not quite there, this will transform the code to allow promotion. As such, +/// it is a non-pure predicate. +static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { + SetVector, + SmallPtrSet > InstsToRewrite; + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast(U)) { + if (LI->isVolatile()) + return false; + continue; + } + + if (StoreInst *SI = dyn_cast(U)) { + if (SI->getOperand(0) == AI || SI->isVolatile()) + return false; // Don't allow a store OF the AI, only INTO the AI. + continue; + } + + if (SelectInst *SI = dyn_cast(U)) { + // If the condition being selected on is a constant, fold the select, yes + // this does (rarely) happen early on. + if (ConstantInt *CI = dyn_cast(SI->getCondition())) { + Value *Result = SI->getOperand(1+CI->isZero()); + SI->replaceAllUsesWith(Result); + SI->eraseFromParent(); + + // This is very rare and we just scrambled the use list of AI, start + // over completely. + return tryToMakeAllocaBePromotable(AI, TD); + } + + // If it is safe to turn "load (select c, AI, ptr)" into a select of two + // loads, then we can transform this by rewriting the select. + if (!isSafeSelectToSpeculate(SI, TD)) + return false; + + InstsToRewrite.insert(SI); + continue; + } + + return false; + } + + // If there are no instructions to rewrite, then all uses are load/stores and + // we're done! + if (InstsToRewrite.empty()) + return true; + + // If we have instructions that need to be rewritten for this to be promotable + // take care of it now. + for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { + // Selects in InstsToRewrite only have load uses. Rewrite each as two + // loads with a new select. + SelectInst *SI = cast(InstsToRewrite[i]); + + for (Value::use_iterator UI = SI->use_begin(), E = SI->use_end(); UI != E;){ + LoadInst *LI = cast(*UI++); + + IRBuilder<> Builder(LI); + LoadInst *TrueLoad = + Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); + LoadInst *FalseLoad = + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + + // Transfer alignment and TBAA info if present. + TrueLoad->setAlignment(LI->getAlignment()); + FalseLoad->setAlignment(LI->getAlignment()); + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { + TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + } + + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); + V->takeName(LI); + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); + } + + // Now that all the loads are gone, the select is gone too. + SI->eraseFromParent(); + } + + ++NumAdjusted; + return true; +} + + bool SROA::performPromotion(Function &F) { std::vector Allocas; DominatorTree *DT = 0; @@ -879,7 +1010,7 @@ bool SROA::performPromotion(Function &F) { // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) + if (tryToMakeAllocaBePromotable(AI, TD)) Allocas.push_back(AI); if (Allocas.empty()) break; diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll index 06ee2081513..f612ed55a2d 100644 --- a/test/Transforms/ScalarRepl/phi-select.ll +++ b/test/Transforms/ScalarRepl/phi-select.ll @@ -44,16 +44,15 @@ F: } ; CHECK: @test3 -; CHECK: %A.0 = alloca i32 -; CHECK: %A.1 = alloca i32 +; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2 +; CHECK-NEXT: ret i32 %Q ; rdar://8904039 define i32 @test3(i1 %c) { -entry: %A = alloca {i32, i32} %B = getelementptr {i32, i32}* %A, i32 0, i32 0 store i32 1, i32* %B %C = getelementptr {i32, i32}* %A, i32 0, i32 1 - store i32 2, i32* %B + store i32 2, i32* %C %X = select i1 %c, i32* %B, i32* %C %Q = load i32* %X @@ -76,3 +75,58 @@ entry: %Q = load i64* %Y ret i64 %Q } + + +;; +;; Tests for promoting allocas used by selects. +;; rdar://7339113 +;; + +define i32 @test5(i32 *%P) nounwind readnone ssp { +entry: + %b = alloca i32, align 8 + store i32 2, i32* %b, align 8 + + ;; Select on constant condition should be folded. + %p.0 = select i1 false, i32* %b, i32* %P + store i32 123, i32* %p.0 + + %r = load i32* %b, align 8 + ret i32 %r + +; CHECK: @test5 +; CHECK: store i32 123, i32* %P +; CHECK: ret i32 2 +} + +define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp { + %a = alloca i32, align 8 + %b = alloca i32, align 8 + store i32 1, i32* %a, align 8 + store i32 2, i32* %b, align 8 + %p.0 = select i1 %c, i32* %b, i32* %a + %r = load i32* %p.0, align 8 + ret i32 %r +; CHECK: @test6 +; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1 +; CHECK-NEXT: ret i32 %r +} + +; Verify that the loads happen where the loads are, not where the select is. +define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp { + %a = alloca i32, align 8 + %b = alloca i32, align 8 + store i32 1, i32* %a + store i32 2, i32* %b + %p.0 = select i1 %c, i32* %b, i32* %a + + store i32 0, i32* %a + + %r = load i32* %p.0, align 8 + ret i32 %r +; CHECK: @test7 +; CHECK-NOT: alloca i32 +; CHECK: %r = select i1 %c, i32 2, i32 0 +; CHECK: ret i32 %r +} + -- 2.34.1