From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 19 Jun 2015 17:39:03 +0000 (+0000)
Subject: AMDGPU: Fix some places missed in rename
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=826539160cb9cccfb5cf85e9c36cdd4500432e00;p=oota-llvm.git

AMDGPU: Fix some places missed in rename

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240143 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 152c7fcfcca..e6f6d0ffe8b 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -635,6 +635,6 @@ include "llvm/IR/IntrinsicsXCore.td"
 include "llvm/IR/IntrinsicsHexagon.td"
 include "llvm/IR/IntrinsicsNVVM.td"
 include "llvm/IR/IntrinsicsMips.td"
-include "llvm/IR/IntrinsicsR600.td"
+include "llvm/IR/IntrinsicsAMDGPU.td"
 include "llvm/IR/IntrinsicsBPF.td"
 include "llvm/IR/IntrinsicsSystemZ.td"
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
new file mode 100644
index 00000000000..50556673822
--- /dev/null
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -0,0 +1,85 @@
+//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the R600-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "r600" in {
+
+class R600ReadPreloadRegisterIntrinsic<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+    GCCBuiltin<name>;
+
+multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
+  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
+  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
+  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
+}
+
+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_global_size">;
+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_local_size">;
+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_ngroups">;
+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_tgid">;
+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_tidig">;
+} // End TargetPrefix = "r600"
+
+let TargetPrefix = "AMDGPU" in {
+
+class AMDGPUReadPreloadRegisterIntrinsic<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+    GCCBuiltin<name>;
+
+def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
+  // 1st parameter: Numerator
+  // 2nd parameter: Denominator
+  // 3rd parameter: Constant to select select between first and
+  //                second. (0 = first, 1 = second).
+  Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
+            [IntrNoMem]>;
+
+def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
+            [IntrNoMem]>;
+
+def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
+  Intrinsic<[llvm_anyfloat_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem]>;
+
+def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
+  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
+            [IntrNoMem]>;
+
+def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
+  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
+  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
+  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
+  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
+
+def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
+  Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
+                                       "__builtin_amdgpu_read_workdim">;
+
+} // End TargetPrefix = "AMDGPU"
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
deleted file mode 100644
index 50556673822..00000000000
--- a/include/llvm/IR/IntrinsicsR600.td
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the R600-specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "r600" in {
-
-class R600ReadPreloadRegisterIntrinsic<string name>
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-    GCCBuiltin<name>;
-
-multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
-  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
-  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
-  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
-}
-
-defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_global_size">;
-defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_local_size">;
-defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_ngroups">;
-defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_tgid">;
-defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_tidig">;
-} // End TargetPrefix = "r600"
-
-let TargetPrefix = "AMDGPU" in {
-
-class AMDGPUReadPreloadRegisterIntrinsic<string name>
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-    GCCBuiltin<name>;
-
-def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
-  // 1st parameter: Numerator
-  // 2nd parameter: Denominator
-  // 3rd parameter: Constant to select select between first and
-  //                second. (0 = first, 1 = second).
-  Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-            [IntrNoMem]>;
-
-def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
-  Intrinsic<[llvm_anyfloat_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-            [IntrNoMem]>;
-
-def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
-  Intrinsic<[llvm_anyfloat_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-            [IntrNoMem]>;
-
-def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
-  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
-            [IntrNoMem]>;
-
-def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
-  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-
-def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
-  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-
-def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
-  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-
-def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
-  Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
-
-def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
-  Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-
-def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
-                                       "__builtin_amdgpu_read_workdim">;
-
-} // End TargetPrefix = "AMDGPU"
diff --git a/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg b/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..6baccf05fff
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
new file mode 100644
index 00000000000..f6f898fae21
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s
+
+; COMMON-LABEL: @test_sink_ptrtoint_asc(
+; ASC: addrspacecast
+; ASC-NOT: ptrtoint
+; ASC-NOT: inttoptr
+
+define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
+bb:
+  %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
+  %tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tmp3 = sext i32 %tmp2 to i64
+  %tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3
+  %tmp5 = load float, float addrspace(1)* %tmp4, align 4
+  %tmp6 = addrspacecast float addrspace(3)* %tmp to float addrspace(4)*
+  %tmp7 = fcmp olt float %tmp5, 8.388608e+06
+  br i1 %tmp7, label %bb8, label %bb14
+
+bb8:                                              ; preds = %bb
+  %tmp9 = tail call float @llvm.fma.f32(float %tmp5, float 0x3FE45F3060000000, float 5.000000e-01) #1
+  %tmp10 = fmul float %tmp9, 0x3E74442D00000000
+  %tmp11 = fsub float -0.000000e+00, %tmp10
+  %tmp12 = tail call float @llvm.fma.f32(float %tmp9, float 0x3E74442D00000000, float %tmp11) #1
+  store float %tmp12, float addrspace(4)* %tmp6, align 4
+  %tmp13 = fsub float -0.000000e+00, %tmp12
+  br label %bb15
+
+bb14:                                             ; preds = %bb
+  store float 2.000000e+00, float addrspace(4)* %tmp6, align 4
+  br label %bb15
+
+bb15:                                             ; preds = %bb14, %bb8
+  %tmp16 = phi float [ 0.000000e+00, %bb14 ], [ %tmp13, %bb8 ]
+  %tmp17 = fsub float -0.000000e+00, %tmp16
+  %tmp18 = tail call float @llvm.fma.f32(float 1.000000e+00, float 0x3FF0AAAAA0000000, float %tmp17) #1
+  %tmp19 = fsub float 2.187500e-01, %tmp18
+  %tmp20 = fsub float 7.187500e-01, %tmp19
+  %tmp21 = fcmp ogt float %tmp5, 1.600000e+01
+  %tmp22 = select i1 %tmp21, float 0x7FF8000000000000, float %tmp20
+  %tmp23 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp3
+  store float %tmp22, float addrspace(1)* %tmp23, align 4
+  ret void
+}
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare i32 @llvm.r600.read.tidig.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/CodeGenPrepare/R600/lit.local.cfg b/test/Transforms/CodeGenPrepare/R600/lit.local.cfg
deleted file mode 100644
index 4086e8d681c..00000000000
--- a/test/Transforms/CodeGenPrepare/R600/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-if not 'R600' in config.root.targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/CodeGenPrepare/R600/no-sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/R600/no-sink-addrspacecast.ll
deleted file mode 100644
index f6f898fae21..00000000000
--- a/test/Transforms/CodeGenPrepare/R600/no-sink-addrspacecast.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s
-
-; COMMON-LABEL: @test_sink_ptrtoint_asc(
-; ASC: addrspacecast
-; ASC-NOT: ptrtoint
-; ASC-NOT: inttoptr
-
-define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
-bb:
-  %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
-  %tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1
-  %tmp3 = sext i32 %tmp2 to i64
-  %tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3
-  %tmp5 = load float, float addrspace(1)* %tmp4, align 4
-  %tmp6 = addrspacecast float addrspace(3)* %tmp to float addrspace(4)*
-  %tmp7 = fcmp olt float %tmp5, 8.388608e+06
-  br i1 %tmp7, label %bb8, label %bb14
-
-bb8:                                              ; preds = %bb
-  %tmp9 = tail call float @llvm.fma.f32(float %tmp5, float 0x3FE45F3060000000, float 5.000000e-01) #1
-  %tmp10 = fmul float %tmp9, 0x3E74442D00000000
-  %tmp11 = fsub float -0.000000e+00, %tmp10
-  %tmp12 = tail call float @llvm.fma.f32(float %tmp9, float 0x3E74442D00000000, float %tmp11) #1
-  store float %tmp12, float addrspace(4)* %tmp6, align 4
-  %tmp13 = fsub float -0.000000e+00, %tmp12
-  br label %bb15
-
-bb14:                                             ; preds = %bb
-  store float 2.000000e+00, float addrspace(4)* %tmp6, align 4
-  br label %bb15
-
-bb15:                                             ; preds = %bb14, %bb8
-  %tmp16 = phi float [ 0.000000e+00, %bb14 ], [ %tmp13, %bb8 ]
-  %tmp17 = fsub float -0.000000e+00, %tmp16
-  %tmp18 = tail call float @llvm.fma.f32(float 1.000000e+00, float 0x3FF0AAAAA0000000, float %tmp17) #1
-  %tmp19 = fsub float 2.187500e-01, %tmp18
-  %tmp20 = fsub float 7.187500e-01, %tmp19
-  %tmp21 = fcmp ogt float %tmp5, 1.600000e+01
-  %tmp22 = select i1 %tmp21, float 0x7FF8000000000000, float %tmp20
-  %tmp23 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp3
-  store float %tmp22, float addrspace(1)* %tmp23, align 4
-  ret void
-}
-
-declare float @llvm.fma.f32(float, float, float) #1
-declare i32 @llvm.r600.read.tidig.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/LoopIdiom/AMDGPU/lit.local.cfg b/test/Transforms/LoopIdiom/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..6baccf05fff
--- /dev/null
+++ b/test/Transforms/LoopIdiom/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
new file mode 100644
index 00000000000..e4301bbb06d
--- /dev/null
+++ b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
@@ -0,0 +1,104 @@
+; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s
+
+; Mostly copied from x86 version.
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;    }
+;    return c;
+;}
+;
+
+; CHECK-LABEL: @popcount_i64
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i32
+; CHECK: entry
+; CHECK: llvm.ctpop.i32
+; CHECK: ret
+define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i32 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i32 %a.addr.04, -1
+  %and = and i32 %sub, %a.addr.04
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;        mydata1 *= c;
+;        mydata2 *= (int)a;
+;    }
+;    return c + mydata1 + mydata2;
+;}
+
+; CHECK-LABEL: @popcount2
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+  %tobool9 = icmp eq i64 %a, 0
+  br i1 %tobool9, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+  %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+  %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.013, 1
+  %sub = add i64 %a.addr.010, -1
+  %and = and i64 %sub, %a.addr.010
+  %mul = mul nsw i32 %inc, %mydata1.addr.011
+  %conv = trunc i64 %and to i32
+  %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+  %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+  %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+  %add2 = add i32 %add, %c.0.lcssa
+  ret i32 %add2
+}
diff --git a/test/Transforms/LoopIdiom/R600/lit.local.cfg b/test/Transforms/LoopIdiom/R600/lit.local.cfg
deleted file mode 100644
index 4086e8d681c..00000000000
--- a/test/Transforms/LoopIdiom/R600/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-if not 'R600' in config.root.targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/LoopIdiom/R600/popcnt.ll b/test/Transforms/LoopIdiom/R600/popcnt.ll
deleted file mode 100644
index e4301bbb06d..00000000000
--- a/test/Transforms/LoopIdiom/R600/popcnt.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s
-
-; Mostly copied from x86 version.
-
-;To recognize this pattern:
-;int popcount(unsigned long long a) {
-;    int c = 0;
-;    while (a) {
-;        c++;
-;        a &= a - 1;
-;    }
-;    return c;
-;}
-;
-
-; CHECK-LABEL: @popcount_i64
-; CHECK: entry
-; CHECK: llvm.ctpop.i64
-; CHECK: ret
-define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
-entry:
-  %tobool3 = icmp eq i64 %a, 0
-  br i1 %tobool3, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
-  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
-  %inc = add nsw i32 %c.05, 1
-  %sub = add i64 %a.addr.04, -1
-  %and = and i64 %sub, %a.addr.04
-  %tobool = icmp eq i64 %and, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
-  ret i32 %c.0.lcssa
-}
-
-; CHECK-LABEL: @popcount_i32
-; CHECK: entry
-; CHECK: llvm.ctpop.i32
-; CHECK: ret
-define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
-entry:
-  %tobool3 = icmp eq i32 %a, 0
-  br i1 %tobool3, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
-  %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
-  %inc = add nsw i32 %c.05, 1
-  %sub = add i32 %a.addr.04, -1
-  %and = and i32 %sub, %a.addr.04
-  %tobool = icmp eq i32 %and, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
-  ret i32 %c.0.lcssa
-}
-
-; To recognize this pattern:
-;int popcount(unsigned long long a, int mydata1, int mydata2) {
-;    int c = 0;
-;    while (a) {
-;        c++;
-;        a &= a - 1;
-;        mydata1 *= c;
-;        mydata2 *= (int)a;
-;    }
-;    return c + mydata1 + mydata2;
-;}
-
-; CHECK-LABEL: @popcount2
-; CHECK: entry
-; CHECK: llvm.ctpop.i64
-; CHECK: ret
-define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
-entry:
-  %tobool9 = icmp eq i64 %a, 0
-  br i1 %tobool9, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
-  %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
-  %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
-  %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
-  %inc = add nsw i32 %c.013, 1
-  %sub = add i64 %a.addr.010, -1
-  %and = and i64 %sub, %a.addr.010
-  %mul = mul nsw i32 %inc, %mydata1.addr.011
-  %conv = trunc i64 %and to i32
-  %mul1 = mul nsw i32 %conv, %mydata2.addr.012
-  %tobool = icmp eq i64 %and, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
-  %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
-  %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
-  %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
-  %add2 = add i32 %add, %c.0.lcssa
-  ret i32 %add2
-}
diff --git a/test/Transforms/SLPVectorizer/AMDGPU/lit.local.cfg b/test/Transforms/SLPVectorizer/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..6baccf05fff
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll b/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll
new file mode 100644
index 00000000000..9ed86f88147
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+; Simple 3-pair chain with loads and stores
+define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_3_3_3(
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double, double addrspace(3)* %a, align 8
+  %i1 = load double, double addrspace(3)* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
+  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1
+  %i4 = load double, double addrspace(3)* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1_as_3_0_0(
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
+; CHECK: ret
+  %i0 = load double, double addrspace(3)* %a, align 8
+  %i1 = load double, double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
+  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
+  %i4 = load double, double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_0_0_3(
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
+  %i3 = load double, double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
+  %i4 = load double, double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/R600/lit.local.cfg b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
deleted file mode 100644
index 4086e8d681c..00000000000
--- a/test/Transforms/SLPVectorizer/R600/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-if not 'R600' in config.root.targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/SLPVectorizer/R600/simplebb.ll b/test/Transforms/SLPVectorizer/R600/simplebb.ll
deleted file mode 100644
index 9ed86f88147..00000000000
--- a/test/Transforms/SLPVectorizer/R600/simplebb.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
-
-target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
-
-
-; Simple 3-pair chain with loads and stores
-define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
-; CHECK-LABEL: @test1_as_3_3_3(
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
-; CHECK: ret
-  %i0 = load double, double addrspace(3)* %a, align 8
-  %i1 = load double, double addrspace(3)* %b, align 8
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
-  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1
-  %i4 = load double, double addrspace(3)* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  store double %mul, double addrspace(3)* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
-  store double %mul5, double addrspace(3)* %arrayidx5, align 8
-  ret void
-}
-
-define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
-; CHECK-LABEL: @test1_as_3_0_0(
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
-; CHECK: ret
-  %i0 = load double, double addrspace(3)* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
-  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  ret void
-}
-
-define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
-; CHECK-LABEL: @test1_as_0_0_3(
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
-; CHECK: ret
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
-  %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
-  %mul5 = fmul double %i3, %i4
-  store double %mul, double addrspace(3)* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
-  store double %mul5, double addrspace(3)* %arrayidx5, align 8
-  ret void
-}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..6baccf05fff
--- /dev/null
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
new file mode 100644
index 00000000000..527634db0f5
--- /dev/null
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -0,0 +1,94 @@
+; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
+
+; IR-LABEL: @sum_of_array(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+  %tmp = sext i32 %y to i64
+  %tmp1 = sext i32 %x to i64
+  %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
+  %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+  %tmp5 = fadd float %tmp4, 0.000000e+00
+  %tmp6 = add i32 %y, 1
+  %tmp7 = sext i32 %tmp6 to i64
+  %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
+  %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+  %tmp11 = fadd float %tmp5, %tmp10
+  %tmp12 = add i32 %x, 1
+  %tmp13 = sext i32 %tmp12 to i64
+  %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
+  %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+  %tmp17 = fadd float %tmp11, %tmp16
+  %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
+  %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+  %tmp21 = fadd float %tmp17, %tmp20
+  store float %tmp21, float addrspace(1)* %output, align 4
+  ret void
+}
+
+@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
+
+; Some of the indices go over the maximum mubuf offset, so don't split them.
+
+; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: add i32 %x, 256
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+  %tmp = sext i32 %y to i64
+  %tmp1 = sext i32 %x to i64
+  %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
+  %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+  %tmp5 = fadd float %tmp4, 0.000000e+00
+  %tmp6 = add i32 %y, 255
+  %tmp7 = sext i32 %tmp6 to i64
+  %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
+  %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+  %tmp11 = fadd float %tmp5, %tmp10
+  %tmp12 = add i32 %x, 256
+  %tmp13 = sext i32 %tmp12 to i64
+  %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
+  %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+  %tmp17 = fadd float %tmp11, %tmp16
+  %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
+  %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+  %tmp21 = fadd float %tmp17, %tmp20
+  store float %tmp21, float addrspace(1)* %output, align 4
+  ret void
+}
+
+
+@lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4
+
+; DS instructions have a larger immediate offset, so make sure these are OK.
+; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+  %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
+  %tmp4 = load float, float addrspace(3)* %tmp2, align 4
+  %tmp5 = fadd float %tmp4, 0.000000e+00
+  %tmp6 = add i32 %y, 255
+  %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6
+  %tmp10 = load float, float addrspace(3)* %tmp8, align 4
+  %tmp11 = fadd float %tmp5, %tmp10
+  %tmp12 = add i32 %x, 4032
+  %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y
+  %tmp16 = load float, float addrspace(3)* %tmp14, align 4
+  %tmp17 = fadd float %tmp11, %tmp16
+  %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6
+  %tmp20 = load float, float addrspace(3)* %tmp18, align 4
+  %tmp21 = fadd float %tmp17, %tmp20
+  store float %tmp21, float addrspace(1)* %output, align 4
+  ret void
+}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg
deleted file mode 100644
index 4086e8d681c..00000000000
--- a/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-if not 'R600' in config.root.targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll
deleted file mode 100644
index 527634db0f5..00000000000
--- a/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s
-
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-
-@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
-
-; IR-LABEL: @sum_of_array(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
-define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
-  %tmp = sext i32 %y to i64
-  %tmp1 = sext i32 %x to i64
-  %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
-  %tmp4 = load float, float addrspace(2)* %tmp2, align 4
-  %tmp5 = fadd float %tmp4, 0.000000e+00
-  %tmp6 = add i32 %y, 1
-  %tmp7 = sext i32 %tmp6 to i64
-  %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
-  %tmp10 = load float, float addrspace(2)* %tmp8, align 4
-  %tmp11 = fadd float %tmp5, %tmp10
-  %tmp12 = add i32 %x, 1
-  %tmp13 = sext i32 %tmp12 to i64
-  %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
-  %tmp16 = load float, float addrspace(2)* %tmp14, align 4
-  %tmp17 = fadd float %tmp11, %tmp16
-  %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
-  %tmp20 = load float, float addrspace(2)* %tmp18, align 4
-  %tmp21 = fadd float %tmp17, %tmp20
-  store float %tmp21, float addrspace(1)* %output, align 4
-  ret void
-}
-
-@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
-
-; Some of the indices go over the maximum mubuf offset, so don't split them.
-
-; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
-; IR: add i32 %x, 256
-; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
-  %tmp = sext i32 %y to i64
-  %tmp1 = sext i32 %x to i64
-  %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
-  %tmp4 = load float, float addrspace(2)* %tmp2, align 4
-  %tmp5 = fadd float %tmp4, 0.000000e+00
-  %tmp6 = add i32 %y, 255
-  %tmp7 = sext i32 %tmp6 to i64
-  %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
-  %tmp10 = load float, float addrspace(2)* %tmp8, align 4
-  %tmp11 = fadd float %tmp5, %tmp10
-  %tmp12 = add i32 %x, 256
-  %tmp13 = sext i32 %tmp12 to i64
-  %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
-  %tmp16 = load float, float addrspace(2)* %tmp14, align 4
-  %tmp17 = fadd float %tmp11, %tmp16
-  %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
-  %tmp20 = load float, float addrspace(2)* %tmp18, align 4
-  %tmp21 = fadd float %tmp17, %tmp20
-  store float %tmp21, float addrspace(1)* %output, align 4
-  ret void
-}
-
-
-@lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4
-
-; DS instructions have a larger immediate offset, so make sure these are OK.
-; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
-define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
-  %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
-  %tmp4 = load float, float addrspace(3)* %tmp2, align 4
-  %tmp5 = fadd float %tmp4, 0.000000e+00
-  %tmp6 = add i32 %y, 255
-  %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6
-  %tmp10 = load float, float addrspace(3)* %tmp8, align 4
-  %tmp11 = fadd float %tmp5, %tmp10
-  %tmp12 = add i32 %x, 4032
-  %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y
-  %tmp16 = load float, float addrspace(3)* %tmp14, align 4
-  %tmp17 = fadd float %tmp11, %tmp16
-  %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6
-  %tmp20 = load float, float addrspace(3)* %tmp18, align 4
-  %tmp21 = fadd float %tmp17, %tmp20
-  store float %tmp21, float addrspace(1)* %output, align 4
-  ret void
-}
diff --git a/test/Transforms/SimplifyCFG/AMDGPU/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/AMDGPU/cttz-ctlz.ll
new file mode 100644
index 00000000000..5b279949464
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/AMDGPU/cttz-ctlz.ll
@@ -0,0 +1,249 @@
+; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
+; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
+
+
+define i64 @test1(i64 %A) {
+; ALL-LABEL: @test1(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTLZ]]
+; SI-NEXT: ret i64 [[SEL]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2(i32 %A) {
+; ALL-LABEL: @test2(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTLZ]]
+; SI-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3(i16 signext %A) {
+; ALL-LABEL: @test3(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTLZ]]
+; SI-NEXT: ret i16 [[SEL]]
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1b(i64 %A) {
+; ALL-LABEL: @test1b(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTTZ]]
+; SI-NEXT: ret i64 [[SEL]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2b(i32 %A) {
+; ALL-LABEL: @test2b(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTTZ]]
+; SI-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3b(i16 signext %A) {
+; ALL-LABEL: @test3b(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTTZ]]
+; SI-NEXT: ret i16 [[SEL]]
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1c(i64 %A) {
+; ALL-LABEL: @test1c(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTLZ]]
+; ALL-NEXT: ret i64 [[SEL]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test2c(i32 %A) {
+; ALL-LABEL: @test2c(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTLZ]]
+; ALL-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3c(i16 signext %A) {
+; ALL-LABEL: @test3c(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTLZ]]
+; ALL-NEXT: ret i16 [[SEL]]
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1d(i64 %A) {
+; ALL-LABEL: @test1d(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTTZ]]
+; ALL-NEXT: ret i64 [[SEL]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2d(i32 %A) {
+; ALL-LABEL: @test2d(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTTZ]]
+; ALL-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3d(i16 signext %A) {
+; ALL-LABEL: @test3d(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTTZ]]
+; ALL-NEXT: ret i16 [[SEL]]
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+  ret i16 %cond
+}
+
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
diff --git a/test/Transforms/SimplifyCFG/AMDGPU/lit.local.cfg b/test/Transforms/SimplifyCFG/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..2a665f06be7
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/R600/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/R600/cttz-ctlz.ll
deleted file mode 100644
index 5b279949464..00000000000
--- a/test/Transforms/SimplifyCFG/R600/cttz-ctlz.ll
+++ /dev/null
@@ -1,249 +0,0 @@
-; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
-; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
-
-
-define i64 @test1(i64 %A) {
-; ALL-LABEL: @test1(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTLZ]]
-; SI-NEXT: ret i64 [[SEL]]
-entry:
-  %tobool = icmp eq i64 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
-  ret i64 %cond
-}
-
-
-define i32 @test2(i32 %A) {
-; ALL-LABEL: @test2(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTLZ]]
-; SI-NEXT: ret i32 [[SEL]]
-entry:
-  %tobool = icmp eq i32 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
-  ret i32 %cond
-}
-
-
-define signext i16 @test3(i16 signext %A) {
-; ALL-LABEL: @test3(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTLZ]]
-; SI-NEXT: ret i16 [[SEL]]
-entry:
-  %tobool = icmp eq i16 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
-  ret i16 %cond
-}
-
-
-define i64 @test1b(i64 %A) {
-; ALL-LABEL: @test1b(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTTZ]]
-; SI-NEXT: ret i64 [[SEL]]
-entry:
-  %tobool = icmp eq i64 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
-  ret i64 %cond
-}
-
-
-define i32 @test2b(i32 %A) {
-; ALL-LABEL: @test2b(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTTZ]]
-; SI-NEXT: ret i32 [[SEL]]
-entry:
-  %tobool = icmp eq i32 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
-  ret i32 %cond
-}
-
-
-define signext i16 @test3b(i16 signext %A) {
-; ALL-LABEL: @test3b(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTTZ]]
-; SI-NEXT: ret i16 [[SEL]]
-entry:
-  %tobool = icmp eq i16 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
-  ret i16 %cond
-}
-
-
-define i64 @test1c(i64 %A) {
-; ALL-LABEL: @test1c(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTLZ]]
-; ALL-NEXT: ret i64 [[SEL]]
-entry:
-  %tobool = icmp eq i64 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
-  ret i64 %cond
-}
-
-define i32 @test2c(i32 %A) {
-; ALL-LABEL: @test2c(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTLZ]]
-; ALL-NEXT: ret i32 [[SEL]]
-entry:
-  %tobool = icmp eq i32 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
-  ret i32 %cond
-}
-
-
-define signext i16 @test3c(i16 signext %A) {
-; ALL-LABEL: @test3c(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTLZ]]
-; ALL-NEXT: ret i16 [[SEL]]
-entry:
-  %tobool = icmp eq i16 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
-  ret i16 %cond
-}
-
-
-define i64 @test1d(i64 %A) {
-; ALL-LABEL: @test1d(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTTZ]]
-; ALL-NEXT: ret i64 [[SEL]]
-entry:
-  %tobool = icmp eq i64 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
-  ret i64 %cond
-}
-
-
-define i32 @test2d(i32 %A) {
-; ALL-LABEL: @test2d(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTTZ]]
-; ALL-NEXT: ret i32 [[SEL]]
-entry:
-  %tobool = icmp eq i32 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
-  ret i32 %cond
-}
-
-
-define signext i16 @test3d(i16 signext %A) {
-; ALL-LABEL: @test3d(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTTZ]]
-; ALL-NEXT: ret i16 [[SEL]]
-entry:
-  %tobool = icmp eq i16 %A, 0
-  br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true:                                        ; preds = %entry
-  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-  br label %cond.end
-
-cond.end:                                         ; preds = %entry, %cond.true
-  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
-  ret i16 %cond
-}
-
-
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i32 @llvm.ctlz.i32(i32, i1)
-declare i16 @llvm.ctlz.i16(i16, i1)
-declare i64 @llvm.cttz.i64(i64, i1)
-declare i32 @llvm.cttz.i32(i32, i1)
-declare i16 @llvm.cttz.i16(i16, i1)
diff --git a/test/Transforms/SimplifyCFG/R600/lit.local.cfg b/test/Transforms/SimplifyCFG/R600/lit.local.cfg
deleted file mode 100644
index ad9ce2541ef..00000000000
--- a/test/Transforms/SimplifyCFG/R600/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not 'R600' in config.root.targets:
-    config.unsupported = True
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg b/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..2a665f06be7
--- /dev/null
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
new file mode 100644
index 00000000000..278250a9c80
--- /dev/null
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
@@ -0,0 +1,107 @@
+; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+
+; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
+; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
+; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
+define void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
+bb:
+  %i2 = shl nsw i32 %i, 1
+  %j1 = add nsw i32 %i, 1023
+  %tmp = sext i32 %j1 to i64
+  %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
+  %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
+  %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
+  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+  %j2 = add nsw i32 %i2, 1023
+  %tmp5 = sext i32 %j2 to i64
+  %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
+  %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
+  %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
+  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
+; CHECK: %j1 = add nsw i32 %i, 1024
+; CHECK: %tmp = sext i32 %j1 to i64
+; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
+; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
+define void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
+bb:
+  %i2 = shl nsw i32 %i, 1
+  %j1 = add nsw i32 %i, 1024
+  %tmp = sext i32 %j1 to i64
+  %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
+  %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
+  %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
+  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+  %j2 = add nsw i32 %i2, 1024
+  %tmp5 = sext i32 %j2 to i64
+  %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
+  %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
+  %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
+  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
+; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
+; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+
+; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
+; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
+bb:
+  %i2 = shl nsw i32 %i, 1
+  %j1 = add nsw i32 %i, 16383
+  %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
+  %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
+  %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
+  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+  %j2 = add nsw i32 %i2, 16383
+  %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
+  %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
+  %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
+  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
+; CHECK: %j1 = add nsw i32 %i, 16384
+; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
+; CHECK: %j2 = add i32 %j1, %i
+; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
+define void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
+bb:
+  %i2 = shl nsw i32 %i, 1
+  %j1 = add nsw i32 %i, 16384
+  %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
+  %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
+  %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
+  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+  %j2 = add nsw i32 %i2, 16384
+  %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
+  %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
+  %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
+  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+  ret void
+}
diff --git a/test/Transforms/StraightLineStrengthReduce/R600/lit.local.cfg b/test/Transforms/StraightLineStrengthReduce/R600/lit.local.cfg
deleted file mode 100644
index ad9ce2541ef..00000000000
--- a/test/Transforms/StraightLineStrengthReduce/R600/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not 'R600' in config.root.targets:
-    config.unsupported = True
diff --git a/test/Transforms/StraightLineStrengthReduce/R600/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/R600/reassociate-geps-and-slsr-addrspace.ll
deleted file mode 100644
index 278250a9c80..00000000000
--- a/test/Transforms/StraightLineStrengthReduce/R600/reassociate-geps-and-slsr-addrspace.ll
+++ /dev/null
@@ -1,107 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
-
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-
-
-; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
-; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
-; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
-define void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
-bb:
-  %i2 = shl nsw i32 %i, 1
-  %j1 = add nsw i32 %i, 1023
-  %tmp = sext i32 %j1 to i64
-  %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
-  %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
-  %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
-  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
-  %j2 = add nsw i32 %i2, 1023
-  %tmp5 = sext i32 %j2 to i64
-  %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
-  %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
-  %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
-  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
-  ret void
-}
-
-; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
-; CHECK: %j1 = add nsw i32 %i, 1024
-; CHECK: %tmp = sext i32 %j1 to i64
-; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
-; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
-define void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
-bb:
-  %i2 = shl nsw i32 %i, 1
-  %j1 = add nsw i32 %i, 1024
-  %tmp = sext i32 %j1 to i64
-  %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
-  %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
-  %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
-  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
-  %j2 = add nsw i32 %i2, 1024
-  %tmp5 = sext i32 %j2 to i64
-  %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
-  %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
-  %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
-  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
-  ret void
-}
-
-; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
-; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
-
-; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
-define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
-bb:
-  %i2 = shl nsw i32 %i, 1
-  %j1 = add nsw i32 %i, 16383
-  %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
-  %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
-  %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
-  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
-  %j2 = add nsw i32 %i2, 16383
-  %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
-  %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
-  %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
-  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
-  ret void
-}
-
-; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
-; CHECK: %j1 = add nsw i32 %i, 16384
-; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
-; CHECK: %j2 = add i32 %j1, %i
-; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
-define void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
-bb:
-  %i2 = shl nsw i32 %i, 1
-  %j1 = add nsw i32 %i, 16384
-  %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
-  %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
-  %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
-  %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
-  %j2 = add nsw i32 %i2, 16384
-  %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
-  %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
-  %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
-  %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
-  store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
-  ret void
-}