Limiting gep merging to fix the performance problem described in

author Wei Mi <wmi@google.com>

Tue, 21 Apr 2015 22:37:09 +0000 (22:37 +0000)

committer Wei Mi <wmi@google.com>

Tue, 21 Apr 2015 22:37:09 +0000 (22:37 +0000)
author Wei Mi <wmi@google.com>
Tue, 21 Apr 2015 22:37:09 +0000 (22:37 +0000)
committer Wei Mi <wmi@google.com>
Tue, 21 Apr 2015 22:37:09 +0000 (22:37 +0000)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp

index 3b46156c1ab0f7eab39d10a5785777d09f06557c..f62941fbf64268262cb4fad0966a6ffa91dd3fb3 100644 (file)
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1467,6 +1467,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
          // normalized.
          if (SO1->getType() != GO1->getType())
            return nullptr;
+        // Only do the combine when GO1 and SO1 are both constants. Only in
+        // this case, we are sure the cost after the merge is never more than
+        // that before the merge.
+        if (!isa<Constant>(GO1) || !isa<Constant>(SO1))
+          return nullptr;
          Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
        }
  
diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll

deleted file mode 100644 (file)

index 4347be4..0000000
--- a/test/Transforms/InstCombine/descale-zero.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.10.0"
-
-define internal i8* @descale_zero() {
-entry:
-; CHECK: load i8*, i8** inttoptr (i64 48 to i8**), align 16
-; CHECK-NEXT: ret i8*
-  %i16_ptr = load i16*, i16** inttoptr (i64 48 to i16**), align 16
-  %num = load i64, i64* inttoptr (i64 64 to i64*), align 64
-  %num_times_2 = shl i64 %num, 1
-  %num_times_2_plus_4 = add i64 %num_times_2, 4
-  %i8_ptr = bitcast i16* %i16_ptr to i8*
-  %i8_ptr_num_times_2_plus_4 = getelementptr i8, i8* %i8_ptr, i64 %num_times_2_plus_4
-  %num_times_neg2 = mul i64 %num, -2
-  %num_times_neg2_minus_4 = add i64 %num_times_neg2, -4
-  %addr = getelementptr i8, i8* %i8_ptr_num_times_2_plus_4, i64 %num_times_neg2_minus_4
-  ret i8* %addr
-}
diff --git a/test/Transforms/InstCombine/gep-merge.ll b/test/Transforms/InstCombine/gep-merge.ll

new file mode 100644 (file)

index 0000000..3cfe781
--- /dev/null
+++ b/test/Transforms/InstCombine/gep-merge.ll
@@ -0,0 +1,48 @@
+; This test makes sure that gep(gep ...) merge doesn't come into effect.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Make sure there are no geps being merged.
+; CHECK-LABEL: @fn3(
+; CHECK: getelementptr
+; CHECK: getelementptr
+; CHECK: getelementptr
+
+@_ZN2cv1aE = global i8* zeroinitializer, align 8
+declare i32 @fn1() #2
+declare i32 @fn2() #2
+
+; Function Attrs: uwtable
+define linkonce_odr i32 @fn3() {
+entry:
+  %call = call i32 @fn1()
+  %call1 = call i32 @fn2()
+  %0 = load i8*, i8** @_ZN2cv1aE, align 8
+  %idx.ext2 = sext i32 %call1 to i64
+  %add.ptr3 = getelementptr inbounds i8, i8* %0, i64 %idx.ext2
+  br label %for.cond5
+
+for.cond5:
+  %total1 = phi i32 [ 0, %entry ], [ %total2, %for.body7 ]
+  %x.1 = phi i32 [ 0, %entry ], [ %inc, %for.body7 ]
+  %cmp6 = icmp slt i32 %x.1, %call
+  br i1 %cmp6, label %for.body7, label %for.cond34
+
+for.body7:                                        ; preds = %for.cond5
+  %mul = mul nsw i32 %x.1, 2
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom
+  %1 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %1 to i32
+  %sub = sub nsw i32 %mul, 1
+  %idxprom10 = sext i32 %sub to i64
+  %arrayidx11 = getelementptr inbounds i8, i8* %add.ptr3, i64 %idxprom10
+  %2 = load i8, i8* %arrayidx11, align 1
+  %conv2 = zext i8 %2 to i32
+  %add1 = add nsw i32 %conv, %conv2
+  %total2 = add nsw i32 %total1, %add1
+  %inc = add nsw i32 %x.1, 1
+  br label %for.cond5
+
+for.cond34:
+  ret i32 %total1
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll

index 9bb1b12a6972405a2bbdb802be7430f5c59e37d2..276ada91f3c63223f5240106306246cbe526b6c0 100644 (file)
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -104,8 +104,8 @@ define i32* @test7(i32* %I, i64 %C, i64 %D) {
          %B = getelementptr i32, i32* %A, i64 %D
          ret i32* %B
  ; CHECK-LABEL: @test7(
-; CHECK: %A.sum = add i64 %C, %D
-; CHECK: getelementptr i32, i32* %I, i64 %A.sum
+; CHECK: %A = getelementptr i32, i32* %I, i64 %C
+; CHECK: %B = getelementptr i32, i32* %A, i64 %D
  }
  
  define i8* @test8([10 x i32]* %X) {
author	Wei Mi <wmi@google.com>
	Tue, 21 Apr 2015 22:37:09 +0000 (22:37 +0000)
committer	Wei Mi <wmi@google.com>
	Tue, 21 Apr 2015 22:37:09 +0000 (22:37 +0000)
lib/Transforms/InstCombine/InstructionCombining.cpp		patch \| blob \| history
test/Transforms/InstCombine/descale-zero.ll	[deleted file]	patch \| blob \| history
test/Transforms/InstCombine/gep-merge.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/InstCombine/getelementptr.ll		patch \| blob \| history