From: Chris Lattner Date: Tue, 22 Apr 2008 02:53:33 +0000 (+0000) Subject: optimize "p != gep p, ..." better. This allows us to compile X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=10c0d9179e596dd295d85cad468f6d68015f5a46;p=oota-llvm.git optimize "p != gep p, ..." better. This allows us to compile getelementptr-seteq.ll into: define i1 @test(i64 %X, %S* %P) { %C = icmp eq i64 %X, -1 ; [#uses=1] ret i1 %C } instead of: define i1 @test(i64 %X, %S* %P) { %A.idx.mask = and i64 %X, 4611686018427387903 ; [#uses=1] %C = icmp eq i64 %A.idx.mask, 4611686018427387903 ; [#uses=1] ret i1 %C } And fixes the second half of PR2235. This speeds up the insertion sort case by 45%, from 1.12s to 0.77s. In practice, this will significantly speed up for loops structured like: for (double *P = Base + N; P != Base; --P) ... Which happens frequently for C++ iterators. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50079 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 10627ea6e92..ab8ddaba2e8 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -4873,7 +4873,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { Value *Result = Constant::getNullValue(IntPtrTy); // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSize()*8; + unsigned IntPtrWidth = TD.getPointerSizeInBits(); uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { @@ -4937,6 +4937,114 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { return Result; } + +/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of +/// the *offset* implied by GEP to zero. For example, if we have &A[i], we want +/// to return 'i' for "icmp ne i, 0". Note that, in general, indices can be +/// complex, and scales are involved. The above expression would also be legal +/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). This +/// later form is less amenable to optimization though, and we are allowed to +/// generate the first by knowing that pointer arithmetic doesn't overflow. +/// +/// If we can't emit an optimized form for this expression, this returns null. +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, + InstCombiner &IC) { +// return 0; + TargetData &TD = IC.getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + + // Check to see if this gep only has a single variable index. If so, and if + // any constant indices are a multiple of its scale, then we can compute this + // in terms of the scale of the variable index. For example, if the GEP + // implies an offset of "12 + i*4", then we can codegen this as "3 + i", + // because the expression will cross zero at the same point. + unsigned i, e = GEP->getNumOperands(); + int64_t Offset = 0; + for (i = 1; i != e; ++i, ++GTI) { + if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } else { + // Found our variable index. + break; + } + } + + // If there are no variable indices, we must have a constant offset, just + // evaluate it the general way. + if (i == e) return 0; + + Value *VariableIdx = GEP->getOperand(i); + // Determine the scale factor of the variable element. For example, this is + // 4 if the variable index is into an array of i32. + uint64_t VariableScale = TD.getABITypeSize(GTI.getIndexedType()); + + // Verify that there are no other variable indices. If so, emit the hard way. + for (++i, ++GTI; i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast(GEP->getOperand(i)); + if (!CI) return 0; + + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } + + // Okay, we know we have a single variable index, which must be a + // pointer/array/vector index. If there is no offset, life is simple, return + // the index. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + if (Offset == 0) { + // Cast to intptrty in case a truncation occurs. If an extension is needed, + // we don't need to bother extending: the extension won't affect where the + // computation crosses zero. + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) + VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(), + VariableIdx->getNameStart(), &I); + return VariableIdx; + } + + // Otherwise, there is an index. The computation we will do will be modulo + // the pointer size, so get it. + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + Offset &= PtrSizeMask; + VariableScale &= PtrSizeMask; + + // To do this transformation, any constant index must be a multiple of the + // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", + // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a + // multiple of the variable scale. + int64_t NewOffs = Offset / (int64_t)VariableScale; + if (Offset != NewOffs*(int64_t)VariableScale) + return 0; + + // Okay, we can do this evaluation. Start by converting the index to intptr. + const Type *IntPtrTy = TD.getIntPtrType(); + if (VariableIdx->getType() != IntPtrTy) + VariableIdx = CastInst::createIntegerCast(VariableIdx, IntPtrTy, + true /*SExt*/, + VariableIdx->getNameStart(), &I); + Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); + return BinaryOperator::createAdd(VariableIdx, OffsetVal, "offset", &I); +} + + /// FoldGEPICmp - Fold comparisons between a GEP instruction and something /// else. At this point we know that the GEP is on the LHS of the comparison. Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, @@ -4944,15 +5052,20 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, Instruction &I) { assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!"); - if (CastInst *CI = dyn_cast(RHS)) - if (isa(CI->getOperand(0)->getType())) - RHS = CI->getOperand(0); + // Look through bitcasts. + if (BitCastInst *BCI = dyn_cast(RHS)) + RHS = BCI->getOperand(0); Value *PtrBase = GEPLHS->getOperand(0); if (PtrBase == RHS) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). - // This transformation is valid because we know pointers can't overflow. - Value *Offset = EmitGEPOffset(GEPLHS, I, *this); + // This transformation (ignoring the base and scales) is valid because we + // know pointers can't overflow. See if we can output an optimized form. + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); + + // If not, synthesize the offset the hard way. + if (Offset == 0) + Offset = EmitGEPOffset(GEPLHS, I, *this); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, Constant::getNullValue(Offset->getType())); } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) { diff --git a/test/Transforms/InstCombine/getelementptr-seteq.ll b/test/Transforms/InstCombine/getelementptr-seteq.ll new file mode 100644 index 00000000000..05cec622b2a --- /dev/null +++ b/test/Transforms/InstCombine/getelementptr-seteq.ll @@ -0,0 +1,13 @@ +; Test folding of constantexpr geps into normal geps. +; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp eq i64 %X, -1} +; PR2235 + +%S = type { i32, [ 100 x i32] } + +define i1 @test(i64 %X, %S* %P) { + %A = getelementptr %S* %P, i32 0, i32 1, i64 %X + %B = getelementptr %S* %P, i32 0, i32 0 + %C = icmp eq i32* %A, %B + ret i1 %C +} +