From: Cameron Zwarich Date: Wed, 16 Mar 2011 00:13:44 +0000 (+0000) Subject: Only convert allocas to scalars if it is profitable. The profitability metric I X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=85b0f468cf8390fca3ec356cd498ce0039dbad4f;p=oota-llvm.git Only convert allocas to scalars if it is profitable. The profitability metric I chose is having a non-memcpy/memset use and being larger than any native integer type. Originally I chose having an access of a size smaller than the total size of the alloca, but this caused some minor issues on the spirit benchmark where SRoA runs again after some inlining. This fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127718 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index f70da11e9a5..3e7c640b9a8 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -238,10 +238,15 @@ class ConvertToScalarInfo { /// also declared as a vector, we do want to promote to a vector. bool HadAVector; + /// HadAVector - True if there is at least one access to the alloca that is + /// not a MemTransferInst. We don't want to turn structs into large integers + /// unless there is some potential for optimization. + bool HadNonMemTransferAccess; + public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), - HadAVector(false) { } + HadAVector(false), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); @@ -280,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { + unsigned BitWidth = AllocaSize * 8; + if (!HadAVector && !HadNonMemTransferAccess && + !TD.fitsInLegalInteger(BitWidth)) + return 0; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); @@ -431,6 +441,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) return false; + HadNonMemTransferAccess = true; MergeInType(LI->getType(), Offset); continue; } @@ -441,6 +452,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; + HadNonMemTransferAccess = true; MergeInType(SI->getOperand(0)->getType(), Offset); continue; } @@ -465,6 +477,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -476,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { !isa(MSI->getLength())) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } diff --git a/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/test/Transforms/ScalarRepl/only-memcpy-uses.ll new file mode 100644 index 00000000000..cfb88bd80d6 --- /dev/null +++ b/test/Transforms/ScalarRepl/only-memcpy-uses.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -scalarrepl -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%struct.S = type { [12 x i32] } + +; CHECK: @bar4 +define void @bar4(%struct.S* byval %s) nounwind ssp { +entry: +; CHECK: alloca +; CHECK-NOT: load +; CHECK: memcpy + %t = alloca %struct.S, align 4 + %agg.tmp = alloca %struct.S, align 4 + %tmp = bitcast %struct.S* %t to i8* + %tmp1 = bitcast %struct.S* %s to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false) + %tmp2 = bitcast %struct.S* %agg.tmp to i8* + %tmp3 = bitcast %struct.S* %t to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false) + %call = call i32 (...)* @bazz(%struct.S* byval %agg.tmp) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +declare i32 @bazz(...)