From: Chris Lattner Date: Sat, 22 Mar 2008 05:37:16 +0000 (+0000) Subject: implement an initial hack at a straight-line store -> memset optimization. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=b017c9e89cdd971a992a414e29caad0232b89a0d;p=oota-llvm.git implement an initial hack at a straight-line store -> memset optimization. This fires dozens of times across spec and multisource, but I don't know if it actually speeds stuff up. Hopefully the testers will show something nice :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48680 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index f72fe8bad3e..a3a33237a36 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetData.h" using namespace llvm; @@ -1036,11 +1037,63 @@ static Value *isBytewiseValue(Value *V) { return 0; } +static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, + bool &VariableIdxFound, TargetData &TD) { + // Skip over the first indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (unsigned i = 1; i != Idx; ++i, ++GTI) + /*skip along*/; + + // Compute the offset implied by the rest of the indices. + int64_t Offset = 0; + for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); + if (OpC == 0) + return VariableIdxFound = true; + if (OpC->isZero()) continue; // No offset. + + // Handle struct indices, which add their field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + continue; + } + + // Otherwise, we have a sequential type like an array or vector. Multiply + // the index by the ElementSize. + uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()); + Offset += Size*OpC->getSExtValue(); + } + + return Offset; +} + /// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2 /// plus the specified constant offset. For example, Ptr1 might be &A[42], and /// Ptr2 might be &A[40] and Offset might be 8. -static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset) { - return true; +static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset, + TargetData &TD) { + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical + // base. After that base, they may have some number of common (and + // potentially variable) indices. After that they handle some constant + // offset, which determines their offset from each other. At this point, we + // handle no other case. + GetElementPtrInst *GEP1 = dyn_cast(Ptr1); + GetElementPtrInst *GEP2 = dyn_cast(Ptr2); + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + return false; + + // Skip any common indices and track the GEP types. + unsigned Idx = 1; + for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) + if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) + break; + + bool VariableIdxFound = false; + int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); + int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); + if (VariableIdxFound) return false; + + return Offset1 == Offset2+(int64_t)Offset; } @@ -1049,7 +1102,6 @@ static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset) { /// neighboring locations of memory. If it sees enough consequtive ones /// (currently 4) it attempts to merge them together into a memcpy/memset. bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { - return false; if (SI->isVolatile()) return false; // There are two cases that are interesting for this code to handle: memcpy @@ -1078,7 +1130,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { SmallVector Stores; Stores.push_back(SI); - BasicBlock::iterator BI = SI; ++BI; + BasicBlock::iterator BI = SI; for (++BI; !isa(BI); ++BI) { if (isa(BI) || isa(BI)) { // If the call is readnone, ignore it, otherwise bail out. We don't even @@ -1110,7 +1162,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { // If so, check to see if the store is before the current range or after it // in either case, extend the range, otherwise reject it. - if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar)) { + if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) { // Okay, this extends the stored area on the end, just add to the bytes // so far and remember this store. BytesSoFar += AccessSize; @@ -1118,7 +1170,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { continue; } - if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize)) { + if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) { // Okay, the store is before the current range. Reset our start pointer // and get new alignment info etc. BytesSoFar += AccessSize; @@ -1170,6 +1222,7 @@ bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { }; new CallInst(F, Ops, Ops+4, "", InsertPt); + // Zap all the stores. toErase.append(Stores.begin(), Stores.end()); ++NumMemSetInfer; diff --git a/test/Transforms/GVN/form-memset.ll b/test/Transforms/GVN/form-memset.ll new file mode 100644 index 00000000000..a026cdef54f --- /dev/null +++ b/test/Transforms/GVN/form-memset.ll @@ -0,0 +1,55 @@ +; RUN: llvm-as < %s | opt -gvn | llvm-dis | not grep store +; RUN: llvm-as < %s | opt -gvn | llvm-dis | grep llvm.memset + +; All the stores in this example should be merged into a single memset. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" + +define void @foo(i8 signext %c) nounwind { +entry: + %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] + %tmp = getelementptr [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] + store i8 %c, i8* %tmp, align 1 + %tmp5 = getelementptr [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] + store i8 %c, i8* %tmp5, align 1 + %tmp9 = getelementptr [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] + store i8 %c, i8* %tmp9, align 1 + %tmp13 = getelementptr [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] + store i8 %c, i8* %tmp13, align 1 + %tmp17 = getelementptr [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] + store i8 %c, i8* %tmp17, align 1 + %tmp21 = getelementptr [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] + store i8 %c, i8* %tmp21, align 1 + %tmp25 = getelementptr [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] + store i8 %c, i8* %tmp25, align 1 + %tmp29 = getelementptr [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] + store i8 %c, i8* %tmp29, align 1 + %tmp33 = getelementptr [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] + store i8 %c, i8* %tmp33, align 1 + %tmp37 = getelementptr [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] + store i8 %c, i8* %tmp37, align 1 + %tmp41 = getelementptr [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] + store i8 %c, i8* %tmp41, align 1 + %tmp45 = getelementptr [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] + store i8 %c, i8* %tmp45, align 1 + %tmp49 = getelementptr [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] + store i8 %c, i8* %tmp49, align 1 + %tmp53 = getelementptr [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] + store i8 %c, i8* %tmp53, align 1 + %tmp57 = getelementptr [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] + store i8 %c, i8* %tmp57, align 1 + %tmp61 = getelementptr [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] + store i8 %c, i8* %tmp61, align 1 + %tmp65 = getelementptr [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] + store i8 %c, i8* %tmp65, align 1 + %tmp69 = getelementptr [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] + store i8 %c, i8* %tmp69, align 1 + %tmp73 = getelementptr [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] + store i8 %c, i8* %tmp73, align 1 + %tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind ; [#uses=0] + ret void +} + +declare i32 @bar(...) +