From: Chris Lattner <sabre@nondot.org>
Date: Tue, 22 Apr 2008 02:53:33 +0000 (+0000)
Subject: optimize "p != gep p, ..." better.  This allows us to compile
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=10c0d9179e596dd295d85cad468f6d68015f5a46;p=oota-llvm.git

optimize "p != gep p, ..." better.  This allows us to compile
getelementptr-seteq.ll into:

define i1 @test(i64 %X, %S* %P) {
	%C = icmp eq i64 %X, -1		; <i1> [#uses=1]
	ret i1 %C
}

instead of:

define i1 @test(i64 %X, %S* %P) {
	%A.idx.mask = and i64 %X, 4611686018427387903		; <i64> [#uses=1]
	%C = icmp eq i64 %A.idx.mask, 4611686018427387903		; <i1> [#uses=1]
	ret i1 %C
}

And fixes the second half of PR2235.  This speeds up the insertion sort
case by 45%, from 1.12s to 0.77s.  In practice, this will significantly
speed up for loops structured like:

for (double *P = Base + N; P != Base; --P)
  ...

Which happens frequently for C++ iterators.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50079 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 10627ea6e92..ab8ddaba2e8 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -4873,7 +4873,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
   Value *Result = Constant::getNullValue(IntPtrTy);
 
   // Build a mask for high order bits.
-  unsigned IntPtrWidth = TD.getPointerSize()*8;
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
   uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
 
   for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
@@ -4937,6 +4937,114 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
   return Result;
 }
 
+
+/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
+/// the *offset* implied by GEP to zero.  For example, if we have &A[i], we want
+/// to return 'i' for "icmp ne i, 0".  Note that, in general, indices can be
+/// complex, and scales are involved.  The above expression would also be legal
+/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).  This
+/// later form is less amenable to optimization though, and we are allowed to
+/// generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+/// 
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+                                          InstCombiner &IC) {
+//  return 0;
+  TargetData &TD = IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+
+  // Check to see if this gep only has a single variable index.  If so, and if
+  // any constant indices are a multiple of its scale, then we can compute this
+  // in terms of the scale of the variable index.  For example, if the GEP
+  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+  // because the expression will cross zero at the same point.
+  unsigned i, e = GEP->getNumOperands();
+  int64_t Offset = 0;
+  for (i = 1; i != e; ++i, ++GTI) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      // Compute the aggregate offset of constant indices.
+      if (CI->isZero()) continue;
+
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+      } else {
+        uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+        Offset += Size*CI->getSExtValue();
+      }
+    } else {
+      // Found our variable index.
+      break;
+    }
+  }
+  
+  // If there are no variable indices, we must have a constant offset, just
+  // evaluate it the general way.
+  if (i == e) return 0;
+  
+  Value *VariableIdx = GEP->getOperand(i);
+  // Determine the scale factor of the variable element.  For example, this is
+  // 4 if the variable index is into an array of i32.
+  uint64_t VariableScale = TD.getABITypeSize(GTI.getIndexedType());
+  
+  // Verify that there are no other variable indices.  If so, emit the hard way.
+  for (++i, ++GTI; i != e; ++i, ++GTI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!CI) return 0;
+   
+    // Compute the aggregate offset of constant indices.
+    if (CI->isZero()) continue;
+    
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+    } else {
+      uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+      Offset += Size*CI->getSExtValue();
+    }
+  }
+  
+  // Okay, we know we have a single variable index, which must be a
+  // pointer/array/vector index.  If there is no offset, life is simple, return
+  // the index.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  if (Offset == 0) {
+    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
+    // we don't need to bother extending: the extension won't affect where the
+    // computation crosses zero.
+    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+      VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
+                                  VariableIdx->getNameStart(), &I);
+    return VariableIdx;
+  }
+  
+  // Otherwise, there is an index.  The computation we will do will be modulo
+  // the pointer size, so get it.
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+  
+  Offset &= PtrSizeMask;
+  VariableScale &= PtrSizeMask;
+
+  // To do this transformation, any constant index must be a multiple of the
+  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
+  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
+  // multiple of the variable scale.
+  int64_t NewOffs = Offset / (int64_t)VariableScale;
+  if (Offset != NewOffs*(int64_t)VariableScale)
+    return 0;
+
+  // Okay, we can do this evaluation.  Start by converting the index to intptr.
+  const Type *IntPtrTy = TD.getIntPtrType();
+  if (VariableIdx->getType() != IntPtrTy)
+    VariableIdx = CastInst::createIntegerCast(VariableIdx, IntPtrTy,
+                                              true /*SExt*/, 
+                                              VariableIdx->getNameStart(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  return BinaryOperator::createAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+
 /// FoldGEPICmp - Fold comparisons between a GEP instruction and something
 /// else.  At this point we know that the GEP is on the LHS of the comparison.
 Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
@@ -4944,15 +5052,20 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
                                        Instruction &I) {
   assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
 
-  if (CastInst *CI = dyn_cast<CastInst>(RHS))
-    if (isa<PointerType>(CI->getOperand(0)->getType()))
-      RHS = CI->getOperand(0);
+  // Look through bitcasts.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+    RHS = BCI->getOperand(0);
 
   Value *PtrBase = GEPLHS->getOperand(0);
   if (PtrBase == RHS) {
     // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
-    // This transformation is valid because we know pointers can't overflow.
-    Value *Offset = EmitGEPOffset(GEPLHS, I, *this);
+    // This transformation (ignoring the base and scales) is valid because we
+    // know pointers can't overflow.  See if we can output an optimized form.
+    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+    
+    // If not, synthesize the offset the hard way.
+    if (Offset == 0)
+      Offset = EmitGEPOffset(GEPLHS, I, *this);
     return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
                         Constant::getNullValue(Offset->getType()));
   } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
diff --git a/test/Transforms/InstCombine/getelementptr-seteq.ll b/test/Transforms/InstCombine/getelementptr-seteq.ll
new file mode 100644
index 00000000000..05cec622b2a
--- /dev/null
+++ b/test/Transforms/InstCombine/getelementptr-seteq.ll
@@ -0,0 +1,13 @@
+; Test folding of constantexpr geps into normal geps.
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp eq i64 %X, -1}
+; PR2235
+
+%S = type { i32, [ 100 x i32] }
+
+define i1 @test(i64 %X, %S* %P) {
+        %A = getelementptr %S* %P, i32 0, i32 1, i64 %X
+        %B = getelementptr %S* %P, i32 0, i32 0
+	%C = icmp eq i32* %A, %B
+	ret i1 %C
+}
+