From 90f48e7c91a8faa875ba889ca66b137ffd46e34a Mon Sep 17 00:00:00 2001 From: Victor Hernandez Date: Wed, 28 Oct 2009 20:18:55 +0000 Subject: [PATCH] Extend getMallocArraySize() to determine the array size if the malloc argument is: ArraySize * ElementSize ElementSize * ArraySize ArraySize << log2(ElementSize) ElementSize << log2(ArraySize) Refactor isArrayMallocHelper and delete isSafeToGetMallocArraySize, so that there is only 1 copy of the malloc array determining logic. Update users of getMallocArraySize() to not bother calling isArrayMalloc() as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85421 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/MemoryBuiltins.h | 25 +-- lib/Analysis/MemoryBuiltins.cpp | 208 +++++++++++------------- lib/Transforms/IPO/GlobalOpt.cpp | 20 +-- test/Transforms/GlobalOpt/heap-sra-3.ll | 41 +++++ test/Transforms/GlobalOpt/heap-sra-4.ll | 41 +++++ 5 files changed, 193 insertions(+), 142 deletions(-) create mode 100644 test/Transforms/GlobalOpt/heap-sra-3.ll create mode 100644 test/Transforms/GlobalOpt/heap-sra-4.ll diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 6395d29c680..5fd0bb09db1 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -43,15 +43,8 @@ const CallInst* extractMallocCallFromBitCast(const Value* I); CallInst* extractMallocCallFromBitCast(Value* I); /// isArrayMalloc - Returns the corresponding CallInst if the instruction -/// matches the malloc call IR generated by CallInst::CreateMalloc(). This -/// means that it is a malloc call with one bitcast use AND the malloc call's -/// size argument is: -/// 1. a constant not equal to the size of the malloced type -/// or -/// 2. the result of a multiplication by the size of the malloced type -/// Otherwise it returns NULL. -/// The unique bitcast is needed to determine the type/size of the array -/// allocation. +/// is a call to malloc whose array size can be determined and the array size +/// is not constant 1. Otherwise, return NULL. CallInst* isArrayMalloc(Value* I, LLVMContext &Context, const TargetData* TD); const CallInst* isArrayMalloc(const Value* I, LLVMContext &Context, const TargetData* TD); @@ -66,16 +59,10 @@ const PointerType* getMallocType(const CallInst* CI); /// unique bitcast use, then return NULL. const Type* getMallocAllocatedType(const CallInst* CI); -/// getMallocArraySize - Returns the array size of a malloc call. For array -/// mallocs, the size is computated in 1 of 3 ways: -/// 1. If the element type is of size 1, then array size is the argument to -/// malloc. -/// 2. Else if the malloc's argument is a constant, the array size is that -/// argument divided by the element type's size. -/// 3. Else the malloc argument must be a multiplication and the array size is -/// the first operand of the multiplication. -/// For non-array mallocs, the computed size is constant 1. -/// This function returns NULL for all mallocs whose array size cannot be +/// getMallocArraySize - Returns the array size of a malloc call. If the +/// argument passed to malloc is a multiple of the size of the malloced type, +/// then return that multiple. For non-array mallocs, the multiple is +/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. Value* getMallocArraySize(CallInst* CI, LLVMContext &Context, const TargetData* TD); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index cf2ad62b617..0982345cfa9 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -88,64 +88,119 @@ const CallInst* llvm::extractMallocCallFromBitCast(const Value* I) { : NULL; } -static bool isArrayMallocHelper(const CallInst *CI, LLVMContext &Context, - const TargetData* TD) { +/// isConstantOne - Return true only if val is constant int 1. +static bool isConstantOne(Value *val) { + return isa(val) && cast(val)->isOne(); +} + +static Value* isArrayMallocHelper(const CallInst *CI, LLVMContext &Context, + const TargetData* TD) { if (!CI) - return false; + return NULL; + // Type must be known to determine array size. const Type* T = getMallocAllocatedType(CI); - - // We can only indentify an array malloc if we know the type of the malloc - // call. - if (!T) return false; + if (!T) + return NULL; Value* MallocArg = CI->getOperand(1); - Constant *ElementSize = ConstantExpr::getSizeOf(T); + ConstantExpr* CO = dyn_cast(MallocArg); + BinaryOperator* BO = dyn_cast(MallocArg); + + Constant* ElementSize = ConstantExpr::getSizeOf(T); ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, MallocArg->getType()); - Constant *FoldedElementSize = ConstantFoldConstantExpression( - cast(ElementSize), - Context, TD); - - - if (isa(MallocArg)) - return (MallocArg != ElementSize); + Constant *FoldedElementSize = + ConstantFoldConstantExpression(cast(ElementSize), Context, TD); + + // First, check if CI is a non-array malloc. + if (CO && ((CO == ElementSize) || + (FoldedElementSize && (CO == FoldedElementSize)))) + // Match CreateMalloc's use of constant 1 array-size for non-array mallocs. + return ConstantInt::get(MallocArg->getType(), 1); + + // Second, check if CI is an array malloc whose array size can be determined. + if (isConstantOne(ElementSize) || + (FoldedElementSize && isConstantOne(FoldedElementSize))) + return MallocArg; - BinaryOperator *BI = dyn_cast(MallocArg); - if (!BI) - return false; + if (!CO && !BO) + return NULL; - if (BI->getOpcode() == Instruction::Mul) - // ArraySize * ElementSize - if (BI->getOperand(1) == ElementSize || - (FoldedElementSize && BI->getOperand(1) == FoldedElementSize)) - return true; + Value* Op0 = NULL; + Value* Op1 = NULL; + unsigned Opcode = 0; + if (CO && ((CO->getOpcode() == Instruction::Mul) || + (CO->getOpcode() == Instruction::Shl))) { + Op0 = CO->getOperand(0); + Op1 = CO->getOperand(1); + Opcode = CO->getOpcode(); + } + if (BO && ((BO->getOpcode() == Instruction::Mul) || + (BO->getOpcode() == Instruction::Shl))) { + Op0 = BO->getOperand(0); + Op1 = BO->getOperand(1); + Opcode = BO->getOpcode(); + } - // TODO: Detect case where MallocArg mul has been transformed to shl. + // Determine array size if malloc's argument is the product of a mul or shl. + if (Op0) { + if (Opcode == Instruction::Mul) { + if ((Op1 == ElementSize) || + (FoldedElementSize && (Op1 == FoldedElementSize))) + // ArraySize * ElementSize + return Op0; + if ((Op0 == ElementSize) || + (FoldedElementSize && (Op0 == FoldedElementSize))) + // ElementSize * ArraySize + return Op1; + } + if (Opcode == Instruction::Shl) { + ConstantInt* Op1Int = dyn_cast(Op1); + if (!Op1Int) return NULL; + Value* Op1Pow = ConstantInt::get(Op1->getType(), + pow(2, Op1Int->getZExtValue())); + if (Op0 == ElementSize || (FoldedElementSize && Op0 == FoldedElementSize)) + // ArraySize << log2(ElementSize) + return Op1Pow; + if (Op1Pow == ElementSize || + (FoldedElementSize && Op1Pow == FoldedElementSize)) + // ElementSize << log2(ArraySize) + return Op0; + } + } - return false; + // We could not determine the malloc array size from MallocArg. + return NULL; } /// isArrayMalloc - Returns the corresponding CallInst if the instruction -/// matches the malloc call IR generated by CallInst::CreateMalloc(). This -/// means that it is a malloc call with one bitcast use AND the malloc call's -/// size argument is: -/// 1. a constant not equal to the size of the malloced type -/// or -/// 2. the result of a multiplication by the size of the malloced type -/// Otherwise it returns NULL. -/// The unique bitcast is needed to determine the type/size of the array -/// allocation. +/// is a call to malloc whose array size can be determined and the array size +/// is not constant 1. Otherwise, return NULL. CallInst* llvm::isArrayMalloc(Value* I, LLVMContext &Context, const TargetData* TD) { CallInst *CI = extractMallocCall(I); - return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL; + Value* ArraySize = isArrayMallocHelper(CI, Context, TD); + + if (ArraySize && + ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1)) + return CI; + + // CI is a non-array malloc or we can't figure out that it is an array malloc. + return NULL; } const CallInst* llvm::isArrayMalloc(const Value* I, LLVMContext &Context, const TargetData* TD) { const CallInst *CI = extractMallocCall(I); - return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL; + Value* ArraySize = isArrayMallocHelper(CI, Context, TD); + + if (ArraySize && + ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1)) + return CI; + + // CI is a non-array malloc or we can't figure out that it is an array malloc. + return NULL; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -183,85 +238,14 @@ const Type* llvm::getMallocAllocatedType(const CallInst* CI) { return PT ? PT->getElementType() : NULL; } -/// isSafeToGetMallocArraySize - Returns true if the array size of a malloc can -/// be determined. It can be determined in these 3 cases of malloc codegen: -/// 1. non-array malloc: The malloc's size argument is a constant and equals the /// size of the type being malloced. -/// 2. array malloc: This is a malloc call with one bitcast use AND the malloc -/// call's size argument is a constant multiple of the size of the malloced -/// type. -/// 3. array malloc: This is a malloc call with one bitcast use AND the malloc -/// call's size argument is the result of a multiplication by the size of the -/// malloced type. -/// Otherwise returns false. -static bool isSafeToGetMallocArraySize(const CallInst *CI, - LLVMContext &Context, - const TargetData* TD) { - if (!CI) - return false; - - // Type must be known to determine array size. - const Type* T = getMallocAllocatedType(CI); - if (!T) return false; - - Value* MallocArg = CI->getOperand(1); - Constant *ElementSize = ConstantExpr::getSizeOf(T); - ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, - MallocArg->getType()); - - // First, check if it is a non-array malloc. - if (isa(MallocArg) && (MallocArg == ElementSize)) - return true; - - // Second, check if it can be determined that this is an array malloc. - return isArrayMallocHelper(CI, Context, TD); -} - -/// isConstantOne - Return true only if val is constant int 1. -static bool isConstantOne(Value *val) { - return isa(val) && cast(val)->isOne(); -} - -/// getMallocArraySize - Returns the array size of a malloc call. For array -/// mallocs, the size is computated in 1 of 3 ways: -/// 1. If the element type is of size 1, then array size is the argument to -/// malloc. -/// 2. Else if the malloc's argument is a constant, the array size is that -/// argument divided by the element type's size. -/// 3. Else the malloc argument must be a multiplication and the array size is -/// the first operand of the multiplication. -/// For non-array mallocs, the computed size is constant 1. -/// This function returns NULL for all mallocs whose array size cannot be +/// getMallocArraySize - Returns the array size of a malloc call. If the +/// argument passed to malloc is a multiple of the size of the malloced type, +/// then return that multiple. For non-array mallocs, the multiple is +/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. Value* llvm::getMallocArraySize(CallInst* CI, LLVMContext &Context, const TargetData* TD) { - if (!isSafeToGetMallocArraySize(CI, Context, TD)) - return NULL; - - // Match CreateMalloc's use of constant 1 array-size for non-array mallocs. - if (!isArrayMalloc(CI, Context, TD)) - return ConstantInt::get(CI->getOperand(1)->getType(), 1); - - Value* MallocArg = CI->getOperand(1); - assert(getMallocAllocatedType(CI) && "getMallocArraySize and no type"); - Constant *ElementSize = ConstantExpr::getSizeOf(getMallocAllocatedType(CI)); - ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, - MallocArg->getType()); - - Constant* CO = dyn_cast(MallocArg); - BinaryOperator* BO = dyn_cast(MallocArg); - assert((isConstantOne(ElementSize) || CO || BO) && - "getMallocArraySize and malformed malloc IR"); - - if (isConstantOne(ElementSize)) - return MallocArg; - - if (CO) - return CO->getOperand(0); - - // TODO: Detect case where MallocArg mul has been transformed to shl. - - assert(BO && "getMallocArraySize not constant but not multiplication either"); - return BO->getOperand(0); + return isArrayMallocHelper(CI, Context, TD); } //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 3963fdea39b..f11230c494b 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -823,6 +823,7 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, BitCastInst *BCI, + Value* NElems, LLVMContext &Context, TargetData* TD) { DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV @@ -830,9 +831,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, const Type *IntPtrTy = TD->getIntPtrType(Context); - Value* ArraySize = getMallocArraySize(CI, Context, TD); - assert(ArraySize && "not a malloc whose array size can be determined"); - ConstantInt *NElements = cast(ArraySize); + ConstantInt *NElements = cast(NElems); if (NElements->getZExtValue() != 1) { // If we have an array allocation, transform it to a single element // allocation to make the code below simpler. @@ -1275,15 +1274,14 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, - CallInst *CI, BitCastInst* BCI, + CallInst *CI, BitCastInst* BCI, + Value* NElems, LLVMContext &Context, - TargetData *TD){ + TargetData *TD) { DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI << " BITCAST = " << *BCI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast(MAT); - Value* ArraySize = getMallocArraySize(CI, Context, TD); - assert(ArraySize && "not a malloc whose array size can be determined"); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1309,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, FieldGlobals.push_back(NGV); Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), - FieldTy, ArraySize, + FieldTy, NElems, BCI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, BCI); @@ -1510,7 +1508,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // something. if (TD && NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD); return true; } @@ -1520,7 +1518,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is an allocation of a fixed size array of structs, analyze as a // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (!isArrayMalloc(CI, Context, TD)) + if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) if (const ArrayType *AT = dyn_cast(AllocTy)) AllocTy = AT->getElementType(); @@ -1547,7 +1545,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI = extractMallocCallFromBitCast(NewMI); } - GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); + GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD); return true; } } diff --git a/test/Transforms/GlobalOpt/heap-sra-3.ll b/test/Transforms/GlobalOpt/heap-sra-3.ll new file mode 100644 index 00000000000..14964853c7c --- /dev/null +++ b/test/Transforms/GlobalOpt/heap-sra-3.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -globalopt -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin10" + + %struct.foo = type { i32, i32 } +@X = internal global %struct.foo* null +; CHECK: @X.f0 +; CHECK: @X.f1 + +define void @bar(i32 %Size) nounwind noinline { +entry: + %mallocsize = mul i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), %Size, ; [#uses=1] +; CHECK: mul i32 %Size + %malloccall = tail call i8* @malloc(i32 %mallocsize) ; [#uses=1] + %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] + store %struct.foo* %.sub, %struct.foo** @X, align 4 + ret void +} + +declare noalias i8* @malloc(i32) + +define i32 @baz() nounwind readonly noinline { +bb1.thread: + %0 = load %struct.foo** @X, align 4 + br label %bb1 + +bb1: ; preds = %bb1, %bb1.thread + %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] + %sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ] + %1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0 + %2 = load i32* %1, align 4 + %3 = add i32 %2, %sum.0.reg2mem.0 + %indvar.next = add i32 %i.0.reg2mem.0, 1 + %exitcond = icmp eq i32 %indvar.next, 1200 + br i1 %exitcond, label %bb2, label %bb1 + +bb2: ; preds = %bb1 + ret i32 %3 +} + diff --git a/test/Transforms/GlobalOpt/heap-sra-4.ll b/test/Transforms/GlobalOpt/heap-sra-4.ll new file mode 100644 index 00000000000..ae97ef1aadb --- /dev/null +++ b/test/Transforms/GlobalOpt/heap-sra-4.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -globalopt -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + + %struct.foo = type { i32, i32 } +@X = internal global %struct.foo* null +; CHECK: @X.f0 +; CHECK: @X.f1 + +define void @bar(i32 %Size) nounwind noinline { +entry: + %mallocsize = shl i32 ptrtoint (%struct.foo* getelementptr (%struct.foo* null, i32 1) to i32), 9, ; [#uses=1] + %malloccall = tail call i8* @malloc(i32 %mallocsize) ; [#uses=1] +; CHECK: @malloc(i32 mul (i32 512 + %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] + store %struct.foo* %.sub, %struct.foo** @X, align 4 + ret void +} + +declare noalias i8* @malloc(i32) + +define i32 @baz() nounwind readonly noinline { +bb1.thread: + %0 = load %struct.foo** @X, align 4 + br label %bb1 + +bb1: ; preds = %bb1, %bb1.thread + %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] + %sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ] + %1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0 + %2 = load i32* %1, align 4 + %3 = add i32 %2, %sum.0.reg2mem.0 + %indvar.next = add i32 %i.0.reg2mem.0, 1 + %exitcond = icmp eq i32 %indvar.next, 1200 + br i1 %exitcond, label %bb2, label %bb1 + +bb2: ; preds = %bb1 + ret i32 %3 +} + -- 2.34.1