From 9827b78b51f285e90c2b1e5add9b28d10c88595c Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Tue, 29 Mar 2011 05:19:52 +0000 Subject: [PATCH] Do some simple copy propagation through integer loads and stores when promoting vector types. This helps a lot with inlined functions when using the ARM soft float ABI. Fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128453 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/ScalarReplAggregates.cpp | 23 ++++---- test/Transforms/ScalarRepl/inline-vector.ll | 53 +++++++++++++++++++ test/Transforms/ScalarRepl/vector_promote.ll | 2 +- 3 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 test/Transforms/ScalarRepl/inline-vector.ll diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index de7bb41e7de..191b667ea30 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -252,7 +252,7 @@ public: private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset); + void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); @@ -315,7 +315,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { +void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, + bool IsLoadOrStore) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. if (VectorTy && VectorTy->isVoidTy()) @@ -331,10 +332,14 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { } else if (In->isFloatTy() || In->isDoubleTy() || (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // Full width accesses can be ignored, because they can always be turned + // into bitcasts. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (IsLoadOrStore && EltSize == AllocaSize) + return; // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && (VectorTy == 0 || cast(VectorTy)->getElementType() @@ -442,7 +447,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (LI->getType()->isX86_MMXTy()) return false; HadNonMemTransferAccess = true; - MergeInType(LI->getType(), Offset); + MergeInType(LI->getType(), Offset, true); continue; } @@ -453,7 +458,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; HadNonMemTransferAccess = true; - MergeInType(SI->getOperand(0)->getType(), Offset); + MergeInType(SI->getOperand(0)->getType(), Offset, true); continue; } @@ -691,11 +696,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. if (const VectorType *VTy = dyn_cast(FromVal->getType())) { - if (ToType->isVectorTy()) { - unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (ToTypeSize == AllocaSize) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); + unsigned ToTypeSize = TD.getTypeAllocSize(ToType); + if (ToTypeSize == AllocaSize) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + if (ToType->isVectorTy()) { assert(isPowerOf2_64(AllocaSize / ToTypeSize) && "Partial vector access of an alloca must have a power-of-2 size " "ratio."); diff --git a/test/Transforms/ScalarRepl/inline-vector.ll b/test/Transforms/ScalarRepl/inline-vector.ll new file mode 100644 index 00000000000..2f51cc7cf59 --- /dev/null +++ b/test/Transforms/ScalarRepl/inline-vector.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -scalarrepl -S | FileCheck %s +; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +%struct.Vector4 = type { float, float, float, float } +@f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16 + +; CHECK: define void @f +; CHECK-NOT: alloca +; CHECK: phi <4 x float> + +define void @f() nounwind ssp { +entry: + %i = alloca i32, align 4 + %vector = alloca %struct.Vector4, align 16 + %agg.tmp = alloca %struct.Vector4, align 16 + %tmp = bitcast %struct.Vector4* %vector to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false) + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ] + store i32 %storemerge, i32* %i, align 4 + %cmp = icmp slt i32 %storemerge, 1000000 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8* + %tmp3 = bitcast %struct.Vector4* %vector to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false) + %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]* + %1 = load [2 x i64]* %0, align 16 + %tmp2.i = extractvalue [2 x i64] %1, 0 + %tmp3.i = zext i64 %tmp2.i to i128 + %tmp10.i = bitcast i128 %tmp3.i to <4 x float> + %sub.i.i = fsub <4 x float> , %tmp10.i + %2 = bitcast %struct.Vector4* %vector to <4 x float>* + store <4 x float> %sub.i.i, <4 x float>* %2, align 16 + %tmp4 = load i32* %i, align 4 + %inc = add nsw i32 %tmp4, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %x = getelementptr inbounds %struct.Vector4* %vector, i32 0, i32 0 + %tmp5 = load float* %x, align 16 + %conv = fpext float %tmp5 to double + %call = call i32 (...)* @printf(double %conv) nounwind + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare i32 @printf(...) diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll index c51c9247f54..ef701c621da 100644 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ b/test/Transforms/ScalarRepl/vector_promote.ll @@ -94,7 +94,7 @@ define i64 @test6(<2 x float> %X) { %tmp = load i64* %P ret i64 %tmp ; CHECK: @test6 -; CHECK: bitcast <2 x float> %X to <1 x i64> +; CHECK: bitcast <2 x float> %X to i64 ; CHECK: ret i64 } -- 2.34.1