From a49e11047c766ae4177616bf764d518c0a0486a5 Mon Sep 17 00:00:00 2001
From: Jingyue Wu <jingyue@google.com>
Date: Thu, 13 Aug 2015 18:48:49 +0000
Subject: [PATCH] [SeparateConstOffsetFromGEP] strengthen the inbounds
 attribute

We used to be over-conservative about preserving inbounds. Actually, the second
GEP (which applies the constant offset) can inherit the inbounds attribute of
the original GEP, because the resultant pointer is equivalent to that of the
original GEP. For example,

  x  = GEP inbounds a, i+5
    =>
  y = GEP a, i               // inbounds removed
  x = GEP inbounds y, 5      // inbounds preserved


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244937 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Scalar/SeparateConstOffsetFromGEP.cpp     | 13 ++++++----
 ...-gep-and-gvn-addrspace-addressing-modes.ll | 14 +++++------
 .../NVPTX/split-gep-and-gvn.ll                | 24 +++++++++----------
 .../NVPTX/split-gep.ll                        | 16 ++++++-------
 .../NVPTX/value-tracking-domtree.ll           |  2 +-
 .../reassociate-geps-and-slsr-addrspace.ll    |  4 ++--
 6 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4a875311881..ad86c8c2828 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -891,13 +891,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // Clear the inbounds attribute because the new index may be off-bound.
   // e.g.,
   //
-  // b = add i64 a, 5
-  // addr = gep inbounds float* p, i64 b
+  // b     = add i64 a, 5
+  // addr  = gep inbounds float, float* p, i64 b
   //
   // is transformed to:
   //
-  // addr2 = gep float* p, i64 a
-  // addr = gep float* addr2, i64 5
+  // addr2 = gep float, float* p, i64 a ; inbounds removed
+  // addr  = gep inbounds float, float* addr2, i64 5
   //
   // If a is -4, although the old index b is in bounds, the new index a is
   // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -907,6 +907,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   //
   // TODO(jingyue): do some range analysis to keep as many inbounds as
   // possible. GEPs with inbounds are more friendly to alias analysis.
+  bool GEPWasInBounds = GEP->isInBounds();
   GEP->setIsInBounds(false);
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
@@ -968,6 +969,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
     NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
                                        ConstantInt::get(IntPtrTy, Index, true),
                                        GEP->getName(), GEP);
+    // Inherit the inbounds attribute of the original GEP.
+    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
   } else {
     // Unlikely but possible. For example,
     // #pragma pack(1)
@@ -990,6 +993,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
         Type::getInt8Ty(GEP->getContext()), NewGEP,
         ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
         GEP);
+    // Inherit the inbounds attribute of the original GEP.
+    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
     if (GEP->getType() != I8PtrTy)
       NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
   }
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
index 527634db0f5..6f117697dde 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
 
 ; IR-LABEL: @sum_of_array(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
 define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
   %tmp = sext i32 %y to i64
   %tmp1 = sext i32 %x to i64
@@ -38,7 +38,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output)
 
 ; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
 ; IR: add i32 %x, 256
 ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
 ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -71,9 +71,9 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(
 ; DS instructions have a larger immediate offset, so make sure these are OK.
 ; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
 define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
   %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
   %tmp4 = load float, float addrspace(3)* %tmp2, align 4
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 073313d40e7..a0410024f6e 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -52,9 +52,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
 
 ; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
 ; the order of "sext" and "add" when computing the array indices. @sum_of_array
@@ -95,9 +95,9 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array2(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
 
 
 ; This function loads
@@ -145,9 +145,9 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array3(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
 
 
 ; This function loads
@@ -191,6 +191,6 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array4(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 2fdd158a35e..eeeac196374 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -44,7 +44,7 @@ entry:
 ; CHECK: add i32 %j, -2
 ; CHECK: sext
 ; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32
 
 ; We should be able to trace into sext/zext if it can be distributed to both
 ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
@@ -65,7 +65,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
 }
 ; CHECK-LABEL: @ext_add_no_overflow(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33
 
 ; Verifies we handle nested sext/zext correctly.
 define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
@@ -110,7 +110,7 @@ entry:
 }
 ; CHECK-LABEL: @sext_or(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32
 
 ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
 ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -125,7 +125,7 @@ entry:
 }
 ; CHECK-LABEL: @expr(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
 ; CHECK: store i64 %b5, i64* %out
 
 ; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
@@ -143,7 +143,7 @@ entry:
 ; CHECK: sext i32
 ; CHECK: sext i32
 ; CHECK: sext i32
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8
 
 ; Verifies we handle "sub" correctly.
 define float* @sub(i64 %i, i64 %j) {
@@ -155,7 +155,7 @@ define float* @sub(i64 %i, i64 %j) {
 ; CHECK-LABEL: @sub(
 ; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155
 
 %struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
 
@@ -173,7 +173,7 @@ entry:
 ; CHECK-LABEL: @packed_struct(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
 ; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
-; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100
+; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
 ; CHECK: bitcast i8* %uglygep to i64*
 
 ; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
@@ -272,7 +272,7 @@ entry:
   %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
 ; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
 ; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
-; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64
+; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
 ; CHECK: bitcast
   ret %struct2* %ptr2
 ; CHECK-NEXT: ret
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
index d1a0f33d5a2..601ca529135 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
@@ -25,7 +25,7 @@ then:
   %or = or i64 %i, 3
   %p = getelementptr inbounds float, float* %input, i64 %or
 ; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr float, float* [[base]], i64 3
+; CHECK: getelementptr inbounds float, float* [[base]], i64 3
   ret float* %p
 
 exit:
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
index 278250a9c80..f2853aca698 100644
--- a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
@@ -57,10 +57,10 @@ bb:
 
 ; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
 ; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
 
 ; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
 define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
 bb:
   %i2 = shl nsw i32 %i, 1
-- 
2.34.1