From: Matt Arsenault Date: Fri, 19 Jun 2015 17:39:03 +0000 (+0000) Subject: AMDGPU: Fix some places missed in rename X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=826539160cb9cccfb5cf85e9c36cdd4500432e00;p=oota-llvm.git AMDGPU: Fix some places missed in rename git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240143 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 152c7fcfcca..e6f6d0ffe8b 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -635,6 +635,6 @@ include "llvm/IR/IntrinsicsXCore.td" include "llvm/IR/IntrinsicsHexagon.td" include "llvm/IR/IntrinsicsNVVM.td" include "llvm/IR/IntrinsicsMips.td" -include "llvm/IR/IntrinsicsR600.td" +include "llvm/IR/IntrinsicsAMDGPU.td" include "llvm/IR/IntrinsicsBPF.td" include "llvm/IR/IntrinsicsSystemZ.td" diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td new file mode 100644 index 00000000000..50556673822 --- /dev/null +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -0,0 +1,85 @@ +//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the R600-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "r600" in { + +class R600ReadPreloadRegisterIntrinsic + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + +multiclass R600ReadPreloadRegisterIntrinsic_xyz { + def _x : R600ReadPreloadRegisterIntrinsic; + def _y : R600ReadPreloadRegisterIntrinsic; + def _z : R600ReadPreloadRegisterIntrinsic; +} + +defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_global_size">; +defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_local_size">; +defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_ngroups">; +defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_tgid">; +defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_tidig">; +} // End TargetPrefix = "r600" + +let TargetPrefix = "AMDGPU" in { + +class AMDGPUReadPreloadRegisterIntrinsic + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + +def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">, + // 1st parameter: Numerator + // 2nd parameter: Denominator + // 3rd parameter: Constant to select select between first and + // second. (0 = first, 1 = second). + Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], + [IntrNoMem]>; + +def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">, + Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], + [IntrNoMem]>; + +def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">, + Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + +def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">, + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem]>; + +def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">, + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + +def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">, + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + +def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">, + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + +def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">, + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; + +def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">, + Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic < + "__builtin_amdgpu_read_workdim">; + +} // End TargetPrefix = "AMDGPU" diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td deleted file mode 100644 index 50556673822..00000000000 --- a/include/llvm/IR/IntrinsicsR600.td +++ /dev/null @@ -1,85 +0,0 @@ -//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the R600-specific intrinsics. -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "r600" in { - -class R600ReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin; - -multiclass R600ReadPreloadRegisterIntrinsic_xyz { - def _x : R600ReadPreloadRegisterIntrinsic; - def _y : R600ReadPreloadRegisterIntrinsic; - def _z : R600ReadPreloadRegisterIntrinsic; -} - -defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_global_size">; -defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_local_size">; -defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_ngroups">; -defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_tgid">; -defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < - "__builtin_r600_read_tidig">; -} // End TargetPrefix = "r600" - -let TargetPrefix = "AMDGPU" in { - -class AMDGPUReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin; - -def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">, - // 1st parameter: Numerator - // 2nd parameter: Denominator - // 3rd parameter: Constant to select select between first and - // second. (0 = first, 1 = second). - Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem]>; - -def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">, - Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem]>; - -def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">, - Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - -def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">, - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem]>; - -def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">, - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - -def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">, - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - -def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">, - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - -def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">, - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; - -def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">, - Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; - -def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic < - "__builtin_amdgpu_read_workdim">; - -} // End TargetPrefix = "AMDGPU" diff --git a/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg b/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..6baccf05fff --- /dev/null +++ b/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll new file mode 100644 index 00000000000..f6f898fae21 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s + +; COMMON-LABEL: @test_sink_ptrtoint_asc( +; ASC: addrspacecast +; ASC-NOT: ptrtoint +; ASC-NOT: inttoptr + +define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 { +bb: + %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16 + %tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1 + %tmp3 = sext i32 %tmp2 to i64 + %tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3 + %tmp5 = load float, float addrspace(1)* %tmp4, align 4 + %tmp6 = addrspacecast float addrspace(3)* %tmp to float addrspace(4)* + %tmp7 = fcmp olt float %tmp5, 8.388608e+06 + br i1 %tmp7, label %bb8, label %bb14 + +bb8: ; preds = %bb + %tmp9 = tail call float @llvm.fma.f32(float %tmp5, float 0x3FE45F3060000000, float 5.000000e-01) #1 + %tmp10 = fmul float %tmp9, 0x3E74442D00000000 + %tmp11 = fsub float -0.000000e+00, %tmp10 + %tmp12 = tail call float @llvm.fma.f32(float %tmp9, float 0x3E74442D00000000, float %tmp11) #1 + store float %tmp12, float addrspace(4)* %tmp6, align 4 + %tmp13 = fsub float -0.000000e+00, %tmp12 + br label %bb15 + +bb14: ; preds = %bb + store float 2.000000e+00, float addrspace(4)* %tmp6, align 4 + br label %bb15 + +bb15: ; preds = %bb14, %bb8 + %tmp16 = phi float [ 0.000000e+00, %bb14 ], [ %tmp13, %bb8 ] + %tmp17 = fsub float -0.000000e+00, %tmp16 + %tmp18 = tail call float @llvm.fma.f32(float 1.000000e+00, float 0x3FF0AAAAA0000000, float %tmp17) #1 + %tmp19 = fsub float 2.187500e-01, %tmp18 + %tmp20 = fsub float 7.187500e-01, %tmp19 + %tmp21 = fcmp ogt float %tmp5, 1.600000e+01 + %tmp22 = select i1 %tmp21, float 0x7FF8000000000000, float %tmp20 + %tmp23 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp3 + store float %tmp22, float addrspace(1)* %tmp23, align 4 + ret void +} + +declare float @llvm.fma.f32(float, float, float) #1 +declare i32 @llvm.r600.read.tidig.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/Transforms/CodeGenPrepare/R600/lit.local.cfg b/test/Transforms/CodeGenPrepare/R600/lit.local.cfg deleted file mode 100644 index 4086e8d681c..00000000000 --- a/test/Transforms/CodeGenPrepare/R600/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'R600' in config.root.targets: - config.unsupported = True - diff --git a/test/Transforms/CodeGenPrepare/R600/no-sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/R600/no-sink-addrspacecast.ll deleted file mode 100644 index f6f898fae21..00000000000 --- a/test/Transforms/CodeGenPrepare/R600/no-sink-addrspacecast.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s - -; COMMON-LABEL: @test_sink_ptrtoint_asc( -; ASC: addrspacecast -; ASC-NOT: ptrtoint -; ASC-NOT: inttoptr - -define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 { -bb: - %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16 - %tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1 - %tmp3 = sext i32 %tmp2 to i64 - %tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3 - %tmp5 = load float, float addrspace(1)* %tmp4, align 4 - %tmp6 = addrspacecast float addrspace(3)* %tmp to float addrspace(4)* - %tmp7 = fcmp olt float %tmp5, 8.388608e+06 - br i1 %tmp7, label %bb8, label %bb14 - -bb8: ; preds = %bb - %tmp9 = tail call float @llvm.fma.f32(float %tmp5, float 0x3FE45F3060000000, float 5.000000e-01) #1 - %tmp10 = fmul float %tmp9, 0x3E74442D00000000 - %tmp11 = fsub float -0.000000e+00, %tmp10 - %tmp12 = tail call float @llvm.fma.f32(float %tmp9, float 0x3E74442D00000000, float %tmp11) #1 - store float %tmp12, float addrspace(4)* %tmp6, align 4 - %tmp13 = fsub float -0.000000e+00, %tmp12 - br label %bb15 - -bb14: ; preds = %bb - store float 2.000000e+00, float addrspace(4)* %tmp6, align 4 - br label %bb15 - -bb15: ; preds = %bb14, %bb8 - %tmp16 = phi float [ 0.000000e+00, %bb14 ], [ %tmp13, %bb8 ] - %tmp17 = fsub float -0.000000e+00, %tmp16 - %tmp18 = tail call float @llvm.fma.f32(float 1.000000e+00, float 0x3FF0AAAAA0000000, float %tmp17) #1 - %tmp19 = fsub float 2.187500e-01, %tmp18 - %tmp20 = fsub float 7.187500e-01, %tmp19 - %tmp21 = fcmp ogt float %tmp5, 1.600000e+01 - %tmp22 = select i1 %tmp21, float 0x7FF8000000000000, float %tmp20 - %tmp23 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp3 - store float %tmp22, float addrspace(1)* %tmp23, align 4 - ret void -} - -declare float @llvm.fma.f32(float, float, float) #1 -declare i32 @llvm.r600.read.tidig.x() #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } diff --git a/test/Transforms/LoopIdiom/AMDGPU/lit.local.cfg b/test/Transforms/LoopIdiom/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..6baccf05fff --- /dev/null +++ b/test/Transforms/LoopIdiom/AMDGPU/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll new file mode 100644 index 00000000000..e4301bbb06d --- /dev/null +++ b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll @@ -0,0 +1,104 @@ +; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s + +; Mostly copied from x86 version. + +;To recognize this pattern: +;int popcount(unsigned long long a) { +; int c = 0; +; while (a) { +; c++; +; a &= a - 1; +; } +; return c; +;} +; + +; CHECK-LABEL: @popcount_i64 +; CHECK: entry +; CHECK: llvm.ctpop.i64 +; CHECK: ret +define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp { +entry: + %tobool3 = icmp eq i64 %a, 0 + br i1 %tobool3, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ] + %inc = add nsw i32 %c.05, 1 + %sub = add i64 %a.addr.04, -1 + %and = and i64 %sub, %a.addr.04 + %tobool = icmp eq i64 %and, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + ret i32 %c.0.lcssa +} + +; CHECK-LABEL: @popcount_i32 +; CHECK: entry +; CHECK: llvm.ctpop.i32 +; CHECK: ret +define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp { +entry: + %tobool3 = icmp eq i32 %a, 0 + br i1 %tobool3, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ] + %inc = add nsw i32 %c.05, 1 + %sub = add i32 %a.addr.04, -1 + %and = and i32 %sub, %a.addr.04 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + ret i32 %c.0.lcssa +} + +; To recognize this pattern: +;int popcount(unsigned long long a, int mydata1, int mydata2) { +; int c = 0; +; while (a) { +; c++; +; a &= a - 1; +; mydata1 *= c; +; mydata2 *= (int)a; +; } +; return c + mydata1 + mydata2; +;} + +; CHECK-LABEL: @popcount2 +; CHECK: entry +; CHECK: llvm.ctpop.i64 +; CHECK: ret +define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp { +entry: + %tobool9 = icmp eq i64 %a, 0 + br i1 %tobool9, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ] + %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ] + %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ] + %inc = add nsw i32 %c.013, 1 + %sub = add i64 %a.addr.010, -1 + %and = and i64 %sub, %a.addr.010 + %mul = mul nsw i32 %inc, %mydata1.addr.011 + %conv = trunc i64 %and to i32 + %mul1 = mul nsw i32 %conv, %mydata2.addr.012 + %tobool = icmp eq i64 %and, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ] + %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ] + %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa + %add2 = add i32 %add, %c.0.lcssa + ret i32 %add2 +} diff --git a/test/Transforms/LoopIdiom/R600/lit.local.cfg b/test/Transforms/LoopIdiom/R600/lit.local.cfg deleted file mode 100644 index 4086e8d681c..00000000000 --- a/test/Transforms/LoopIdiom/R600/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'R600' in config.root.targets: - config.unsupported = True - diff --git a/test/Transforms/LoopIdiom/R600/popcnt.ll b/test/Transforms/LoopIdiom/R600/popcnt.ll deleted file mode 100644 index e4301bbb06d..00000000000 --- a/test/Transforms/LoopIdiom/R600/popcnt.ll +++ /dev/null @@ -1,104 +0,0 @@ -; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s - -; Mostly copied from x86 version. - -;To recognize this pattern: -;int popcount(unsigned long long a) { -; int c = 0; -; while (a) { -; c++; -; a &= a - 1; -; } -; return c; -;} -; - -; CHECK-LABEL: @popcount_i64 -; CHECK: entry -; CHECK: llvm.ctpop.i64 -; CHECK: ret -define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp { -entry: - %tobool3 = icmp eq i64 %a, 0 - br i1 %tobool3, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] - %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ] - %inc = add nsw i32 %c.05, 1 - %sub = add i64 %a.addr.04, -1 - %and = and i64 %sub, %a.addr.04 - %tobool = icmp eq i64 %and, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] - ret i32 %c.0.lcssa -} - -; CHECK-LABEL: @popcount_i32 -; CHECK: entry -; CHECK: llvm.ctpop.i32 -; CHECK: ret -define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp { -entry: - %tobool3 = icmp eq i32 %a, 0 - br i1 %tobool3, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] - %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ] - %inc = add nsw i32 %c.05, 1 - %sub = add i32 %a.addr.04, -1 - %and = and i32 %sub, %a.addr.04 - %tobool = icmp eq i32 %and, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] - ret i32 %c.0.lcssa -} - -; To recognize this pattern: -;int popcount(unsigned long long a, int mydata1, int mydata2) { -; int c = 0; -; while (a) { -; c++; -; a &= a - 1; -; mydata1 *= c; -; mydata2 *= (int)a; -; } -; return c + mydata1 + mydata2; -;} - -; CHECK-LABEL: @popcount2 -; CHECK: entry -; CHECK: llvm.ctpop.i64 -; CHECK: ret -define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp { -entry: - %tobool9 = icmp eq i64 %a, 0 - br i1 %tobool9, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ] - %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ] - %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ] - %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ] - %inc = add nsw i32 %c.013, 1 - %sub = add i64 %a.addr.010, -1 - %and = and i64 %sub, %a.addr.010 - %mul = mul nsw i32 %inc, %mydata1.addr.011 - %conv = trunc i64 %and to i32 - %mul1 = mul nsw i32 %conv, %mydata2.addr.012 - %tobool = icmp eq i64 %and, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] - %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ] - %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ] - %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa - %add2 = add i32 %add, %c.0.lcssa - ret i32 %add2 -} diff --git a/test/Transforms/SLPVectorizer/AMDGPU/lit.local.cfg b/test/Transforms/SLPVectorizer/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..6baccf05fff --- /dev/null +++ b/test/Transforms/SLPVectorizer/AMDGPU/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll b/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll new file mode 100644 index 00000000000..9ed86f88147 --- /dev/null +++ b/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll @@ -0,0 +1,65 @@ +; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" + + +; Simple 3-pair chain with loads and stores +define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) { +; CHECK-LABEL: @test1_as_3_3_3( +; CHECK: load <2 x double>, <2 x double> addrspace(3)* +; CHECK: load <2 x double>, <2 x double> addrspace(3)* +; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* % +; CHECK: ret + %i0 = load double, double addrspace(3)* %a, align 8 + %i1 = load double, double addrspace(3)* %b, align 8 + %mul = fmul double %i0, %i1 + %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1 + %i3 = load double, double addrspace(3)* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1 + %i4 = load double, double addrspace(3)* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4 + store double %mul, double addrspace(3)* %c, align 8 + %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1 + store double %mul5, double addrspace(3)* %arrayidx5, align 8 + ret void +} + +define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) { +; CHECK-LABEL: @test1_as_3_0_0( +; CHECK: load <2 x double>, <2 x double> addrspace(3)* +; CHECK: load <2 x double>, <2 x double>* +; CHECK: store <2 x double> %{{.*}}, <2 x double>* % +; CHECK: ret + %i0 = load double, double addrspace(3)* %a, align 8 + %i1 = load double, double* %b, align 8 + %mul = fmul double %i0, %i1 + %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1 + %i3 = load double, double addrspace(3)* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 + %i4 = load double, double* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4 + store double %mul, double* %c, align 8 + %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 + store double %mul5, double* %arrayidx5, align 8 + ret void +} + +define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) { +; CHECK-LABEL: @test1_as_0_0_3( +; CHECK: load <2 x double>, <2 x double>* +; CHECK: load <2 x double>, <2 x double>* +; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* % +; CHECK: ret + %i0 = load double, double* %a, align 8 + %i1 = load double, double* %b, align 8 + %mul = fmul double %i0, %i1 + %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 + %i3 = load double, double* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 + %i4 = load double, double* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4 + store double %mul, double addrspace(3)* %c, align 8 + %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1 + store double %mul5, double addrspace(3)* %arrayidx5, align 8 + ret void +} diff --git a/test/Transforms/SLPVectorizer/R600/lit.local.cfg b/test/Transforms/SLPVectorizer/R600/lit.local.cfg deleted file mode 100644 index 4086e8d681c..00000000000 --- a/test/Transforms/SLPVectorizer/R600/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'R600' in config.root.targets: - config.unsupported = True - diff --git a/test/Transforms/SLPVectorizer/R600/simplebb.ll b/test/Transforms/SLPVectorizer/R600/simplebb.ll deleted file mode 100644 index 9ed86f88147..00000000000 --- a/test/Transforms/SLPVectorizer/R600/simplebb.ll +++ /dev/null @@ -1,65 +0,0 @@ -; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s - -target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" - - -; Simple 3-pair chain with loads and stores -define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) { -; CHECK-LABEL: @test1_as_3_3_3( -; CHECK: load <2 x double>, <2 x double> addrspace(3)* -; CHECK: load <2 x double>, <2 x double> addrspace(3)* -; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* % -; CHECK: ret - %i0 = load double, double addrspace(3)* %a, align 8 - %i1 = load double, double addrspace(3)* %b, align 8 - %mul = fmul double %i0, %i1 - %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1 - %i3 = load double, double addrspace(3)* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1 - %i4 = load double, double addrspace(3)* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - store double %mul, double addrspace(3)* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1 - store double %mul5, double addrspace(3)* %arrayidx5, align 8 - ret void -} - -define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) { -; CHECK-LABEL: @test1_as_3_0_0( -; CHECK: load <2 x double>, <2 x double> addrspace(3)* -; CHECK: load <2 x double>, <2 x double>* -; CHECK: store <2 x double> %{{.*}}, <2 x double>* % -; CHECK: ret - %i0 = load double, double addrspace(3)* %a, align 8 - %i1 = load double, double* %b, align 8 - %mul = fmul double %i0, %i1 - %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1 - %i3 = load double, double addrspace(3)* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 - %i4 = load double, double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - store double %mul, double* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 - store double %mul5, double* %arrayidx5, align 8 - ret void -} - -define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) { -; CHECK-LABEL: @test1_as_0_0_3( -; CHECK: load <2 x double>, <2 x double>* -; CHECK: load <2 x double>, <2 x double>* -; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* % -; CHECK: ret - %i0 = load double, double* %a, align 8 - %i1 = load double, double* %b, align 8 - %mul = fmul double %i0, %i1 - %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 - %i3 = load double, double* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 - %i4 = load double, double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - store double %mul, double addrspace(3)* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1 - store double %mul5, double addrspace(3)* %arrayidx5, align 8 - ret void -} diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..6baccf05fff --- /dev/null +++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll new file mode 100644 index 00000000000..527634db0f5 --- /dev/null +++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll @@ -0,0 +1,94 @@ +; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4 + +; IR-LABEL: @sum_of_array( +; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} +; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1 +; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32 +; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33 +define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { + %tmp = sext i32 %y to i64 + %tmp1 = sext i32 %x to i64 + %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp + %tmp4 = load float, float addrspace(2)* %tmp2, align 4 + %tmp5 = fadd float %tmp4, 0.000000e+00 + %tmp6 = add i32 %y, 1 + %tmp7 = sext i32 %tmp6 to i64 + %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7 + %tmp10 = load float, float addrspace(2)* %tmp8, align 4 + %tmp11 = fadd float %tmp5, %tmp10 + %tmp12 = add i32 %x, 1 + %tmp13 = sext i32 %tmp12 to i64 + %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp + %tmp16 = load float, float addrspace(2)* %tmp14, align 4 + %tmp17 = fadd float %tmp11, %tmp16 + %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7 + %tmp20 = load float, float addrspace(2)* %tmp18, align 4 + %tmp21 = fadd float %tmp17, %tmp20 + store float %tmp21, float addrspace(1)* %output, align 4 + ret void +} + +@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4 + +; Some of the indices go over the maximum mubuf offset, so don't split them. + +; IR-LABEL: @sum_of_array_over_max_mubuf_offset( +; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} +; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255 +; IR: add i32 %x, 256 +; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} +; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} +define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { + %tmp = sext i32 %y to i64 + %tmp1 = sext i32 %x to i64 + %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp + %tmp4 = load float, float addrspace(2)* %tmp2, align 4 + %tmp5 = fadd float %tmp4, 0.000000e+00 + %tmp6 = add i32 %y, 255 + %tmp7 = sext i32 %tmp6 to i64 + %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7 + %tmp10 = load float, float addrspace(2)* %tmp8, align 4 + %tmp11 = fadd float %tmp5, %tmp10 + %tmp12 = add i32 %x, 256 + %tmp13 = sext i32 %tmp12 to i64 + %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp + %tmp16 = load float, float addrspace(2)* %tmp14, align 4 + %tmp17 = fadd float %tmp11, %tmp16 + %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7 + %tmp20 = load float, float addrspace(2)* %tmp18, align 4 + %tmp21 = fadd float %tmp17, %tmp20 + store float %tmp21, float addrspace(1)* %output, align 4 + ret void +} + + +@lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4 + +; DS instructions have a larger immediate offset, so make sure these are OK. +; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset( +; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}} +; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255 +; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128 +; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383 +define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { + %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y + %tmp4 = load float, float addrspace(3)* %tmp2, align 4 + %tmp5 = fadd float %tmp4, 0.000000e+00 + %tmp6 = add i32 %y, 255 + %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6 + %tmp10 = load float, float addrspace(3)* %tmp8, align 4 + %tmp11 = fadd float %tmp5, %tmp10 + %tmp12 = add i32 %x, 4032 + %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y + %tmp16 = load float, float addrspace(3)* %tmp14, align 4 + %tmp17 = fadd float %tmp11, %tmp16 + %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6 + %tmp20 = load float, float addrspace(3)* %tmp18, align 4 + %tmp21 = fadd float %tmp17, %tmp20 + store float %tmp21, float addrspace(1)* %output, align 4 + ret void +} diff --git a/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg deleted file mode 100644 index 4086e8d681c..00000000000 --- a/test/Transforms/SeparateConstOffsetFromGEP/R600/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'R600' in config.root.targets: - config.unsupported = True - diff --git a/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll deleted file mode 100644 index 527634db0f5..00000000000 --- a/test/Transforms/SeparateConstOffsetFromGEP/R600/split-gep-and-gvn-addrspace-addressing-modes.ll +++ /dev/null @@ -1,94 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s - -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" - -@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4 - -; IR-LABEL: @sum_of_array( -; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1 -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32 -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33 -define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { - %tmp = sext i32 %y to i64 - %tmp1 = sext i32 %x to i64 - %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp - %tmp4 = load float, float addrspace(2)* %tmp2, align 4 - %tmp5 = fadd float %tmp4, 0.000000e+00 - %tmp6 = add i32 %y, 1 - %tmp7 = sext i32 %tmp6 to i64 - %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7 - %tmp10 = load float, float addrspace(2)* %tmp8, align 4 - %tmp11 = fadd float %tmp5, %tmp10 - %tmp12 = add i32 %x, 1 - %tmp13 = sext i32 %tmp12 to i64 - %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp - %tmp16 = load float, float addrspace(2)* %tmp14, align 4 - %tmp17 = fadd float %tmp11, %tmp16 - %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7 - %tmp20 = load float, float addrspace(2)* %tmp18, align 4 - %tmp21 = fadd float %tmp17, %tmp20 - store float %tmp21, float addrspace(1)* %output, align 4 - ret void -} - -@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4 - -; Some of the indices go over the maximum mubuf offset, so don't split them. - -; IR-LABEL: @sum_of_array_over_max_mubuf_offset( -; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255 -; IR: add i32 %x, 256 -; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { - %tmp = sext i32 %y to i64 - %tmp1 = sext i32 %x to i64 - %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp - %tmp4 = load float, float addrspace(2)* %tmp2, align 4 - %tmp5 = fadd float %tmp4, 0.000000e+00 - %tmp6 = add i32 %y, 255 - %tmp7 = sext i32 %tmp6 to i64 - %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7 - %tmp10 = load float, float addrspace(2)* %tmp8, align 4 - %tmp11 = fadd float %tmp5, %tmp10 - %tmp12 = add i32 %x, 256 - %tmp13 = sext i32 %tmp12 to i64 - %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp - %tmp16 = load float, float addrspace(2)* %tmp14, align 4 - %tmp17 = fadd float %tmp11, %tmp16 - %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7 - %tmp20 = load float, float addrspace(2)* %tmp18, align 4 - %tmp21 = fadd float %tmp17, %tmp20 - store float %tmp21, float addrspace(1)* %output, align 4 - ret void -} - - -@lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4 - -; DS instructions have a larger immediate offset, so make sure these are OK. -; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset( -; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383 -define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { - %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y - %tmp4 = load float, float addrspace(3)* %tmp2, align 4 - %tmp5 = fadd float %tmp4, 0.000000e+00 - %tmp6 = add i32 %y, 255 - %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6 - %tmp10 = load float, float addrspace(3)* %tmp8, align 4 - %tmp11 = fadd float %tmp5, %tmp10 - %tmp12 = add i32 %x, 4032 - %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y - %tmp16 = load float, float addrspace(3)* %tmp14, align 4 - %tmp17 = fadd float %tmp11, %tmp16 - %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6 - %tmp20 = load float, float addrspace(3)* %tmp18, align 4 - %tmp21 = fadd float %tmp17, %tmp20 - store float %tmp21, float addrspace(1)* %output, align 4 - ret void -} diff --git a/test/Transforms/SimplifyCFG/AMDGPU/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/AMDGPU/cttz-ctlz.ll new file mode 100644 index 00000000000..5b279949464 --- /dev/null +++ b/test/Transforms/SimplifyCFG/AMDGPU/cttz-ctlz.ll @@ -0,0 +1,249 @@ +; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s +; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s + + +define i64 @test1(i64 %A) { +; ALL-LABEL: @test1( +; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 +; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) +; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTLZ]] +; SI-NEXT: ret i64 [[SEL]] +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ] + ret i64 %cond +} + + +define i32 @test2(i32 %A) { +; ALL-LABEL: @test2( +; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) +; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTLZ]] +; SI-NEXT: ret i32 [[SEL]] +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ] + ret i32 %cond +} + + +define signext i16 @test3(i16 signext %A) { +; ALL-LABEL: @test3( +; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 +; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) +; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTLZ]] +; SI-NEXT: ret i16 [[SEL]] +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ] + ret i16 %cond +} + + +define i64 @test1b(i64 %A) { +; ALL-LABEL: @test1b( +; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 +; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) +; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTTZ]] +; SI-NEXT: ret i64 [[SEL]] +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ] + ret i64 %cond +} + + +define i32 @test2b(i32 %A) { +; ALL-LABEL: @test2b( +; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) +; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTTZ]] +; SI-NEXT: ret i32 [[SEL]] +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ] + ret i32 %cond +} + + +define signext i16 @test3b(i16 signext %A) { +; ALL-LABEL: @test3b( +; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 +; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) +; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTTZ]] +; SI-NEXT: ret i16 [[SEL]] +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ] + ret i16 %cond +} + + +define i64 @test1c(i64 %A) { +; ALL-LABEL: @test1c( +; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 +; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) +; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTLZ]] +; ALL-NEXT: ret i64 [[SEL]] +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ] + ret i64 %cond +} + +define i32 @test2c(i32 %A) { +; ALL-LABEL: @test2c( +; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) +; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTLZ]] +; ALL-NEXT: ret i32 [[SEL]] +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ] + ret i32 %cond +} + + +define signext i16 @test3c(i16 signext %A) { +; ALL-LABEL: @test3c( +; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 +; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) +; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTLZ]] +; ALL-NEXT: ret i16 [[SEL]] +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ] + ret i16 %cond +} + + +define i64 @test1d(i64 %A) { +; ALL-LABEL: @test1d( +; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 +; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) +; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTTZ]] +; ALL-NEXT: ret i64 [[SEL]] +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ] + ret i64 %cond +} + + +define i32 @test2d(i32 %A) { +; ALL-LABEL: @test2d( +; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) +; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTTZ]] +; ALL-NEXT: ret i32 [[SEL]] +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ] + ret i32 %cond +} + + +define signext i16 @test3d(i16 signext %A) { +; ALL-LABEL: @test3d( +; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 +; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) +; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTTZ]] +; ALL-NEXT: ret i16 [[SEL]] +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ] + ret i16 %cond +} + + +declare i64 @llvm.ctlz.i64(i64, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i64 @llvm.cttz.i64(i64, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i16 @llvm.cttz.i16(i16, i1) diff --git a/test/Transforms/SimplifyCFG/AMDGPU/lit.local.cfg b/test/Transforms/SimplifyCFG/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..2a665f06be7 --- /dev/null +++ b/test/Transforms/SimplifyCFG/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/SimplifyCFG/R600/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/R600/cttz-ctlz.ll deleted file mode 100644 index 5b279949464..00000000000 --- a/test/Transforms/SimplifyCFG/R600/cttz-ctlz.ll +++ /dev/null @@ -1,249 +0,0 @@ -; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s -; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s - - -define i64 @test1(i64 %A) { -; ALL-LABEL: @test1( -; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 -; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) -; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTLZ]] -; SI-NEXT: ret i64 [[SEL]] -entry: - %tobool = icmp eq i64 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ] - ret i64 %cond -} - - -define i32 @test2(i32 %A) { -; ALL-LABEL: @test2( -; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 -; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) -; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTLZ]] -; SI-NEXT: ret i32 [[SEL]] -entry: - %tobool = icmp eq i32 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ] - ret i32 %cond -} - - -define signext i16 @test3(i16 signext %A) { -; ALL-LABEL: @test3( -; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 -; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) -; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTLZ]] -; SI-NEXT: ret i16 [[SEL]] -entry: - %tobool = icmp eq i16 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ] - ret i16 %cond -} - - -define i64 @test1b(i64 %A) { -; ALL-LABEL: @test1b( -; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 -; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) -; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTTZ]] -; SI-NEXT: ret i64 [[SEL]] -entry: - %tobool = icmp eq i64 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ] - ret i64 %cond -} - - -define i32 @test2b(i32 %A) { -; ALL-LABEL: @test2b( -; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 -; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) -; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTTZ]] -; SI-NEXT: ret i32 [[SEL]] -entry: - %tobool = icmp eq i32 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ] - ret i32 %cond -} - - -define signext i16 @test3b(i16 signext %A) { -; ALL-LABEL: @test3b( -; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 -; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) -; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTTZ]] -; SI-NEXT: ret i16 [[SEL]] -entry: - %tobool = icmp eq i16 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ] - ret i16 %cond -} - - -define i64 @test1c(i64 %A) { -; ALL-LABEL: @test1c( -; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 -; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) -; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTLZ]] -; ALL-NEXT: ret i64 [[SEL]] -entry: - %tobool = icmp eq i64 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ] - ret i64 %cond -} - -define i32 @test2c(i32 %A) { -; ALL-LABEL: @test2c( -; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 -; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) -; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTLZ]] -; ALL-NEXT: ret i32 [[SEL]] -entry: - %tobool = icmp eq i32 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ] - ret i32 %cond -} - - -define signext i16 @test3c(i16 signext %A) { -; ALL-LABEL: @test3c( -; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 -; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) -; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTLZ]] -; ALL-NEXT: ret i16 [[SEL]] -entry: - %tobool = icmp eq i16 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ] - ret i16 %cond -} - - -define i64 @test1d(i64 %A) { -; ALL-LABEL: @test1d( -; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 -; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) -; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTTZ]] -; ALL-NEXT: ret i64 [[SEL]] -entry: - %tobool = icmp eq i64 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ] - ret i64 %cond -} - - -define i32 @test2d(i32 %A) { -; ALL-LABEL: @test2d( -; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 -; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) -; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTTZ]] -; ALL-NEXT: ret i32 [[SEL]] -entry: - %tobool = icmp eq i32 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ] - ret i32 %cond -} - - -define signext i16 @test3d(i16 signext %A) { -; ALL-LABEL: @test3d( -; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 -; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) -; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTTZ]] -; ALL-NEXT: ret i16 [[SEL]] -entry: - %tobool = icmp eq i16 %A, 0 - br i1 %tobool, label %cond.end, label %cond.true - -cond.true: ; preds = %entry - %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) - br label %cond.end - -cond.end: ; preds = %entry, %cond.true - %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ] - ret i16 %cond -} - - -declare i64 @llvm.ctlz.i64(i64, i1) -declare i32 @llvm.ctlz.i32(i32, i1) -declare i16 @llvm.ctlz.i16(i16, i1) -declare i64 @llvm.cttz.i64(i64, i1) -declare i32 @llvm.cttz.i32(i32, i1) -declare i16 @llvm.cttz.i16(i16, i1) diff --git a/test/Transforms/SimplifyCFG/R600/lit.local.cfg b/test/Transforms/SimplifyCFG/R600/lit.local.cfg deleted file mode 100644 index ad9ce2541ef..00000000000 --- a/test/Transforms/SimplifyCFG/R600/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'R600' in config.root.targets: - config.unsupported = True diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg b/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..2a665f06be7 --- /dev/null +++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll new file mode 100644 index 00000000000..278250a9c80 --- /dev/null +++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + + +; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset( +; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]] +; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]] +define void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { +bb: + %i2 = shl nsw i32 %i, 1 + %j1 = add nsw i32 %i, 1023 + %tmp = sext i32 %j1 to i64 + %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp + %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)* + %v11 = load i32, i32 addrspace(1)* %tmp3, align 4 + %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v11, i32 addrspace(1)* %tmp4, align 4 + + %j2 = add nsw i32 %i2, 1023 + %tmp5 = sext i32 %j2 to i64 + %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 + %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)* + %v22 = load i32, i32 addrspace(1)* %tmp6, align 4 + %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v22, i32 addrspace(1)* %tmp7, align 4 + + ret void +} + +; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset( +; CHECK: %j1 = add nsw i32 %i, 1024 +; CHECK: %tmp = sext i32 %j1 to i64 +; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp +; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 +define void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { +bb: + %i2 = shl nsw i32 %i, 1 + %j1 = add nsw i32 %i, 1024 + %tmp = sext i32 %j1 to i64 + %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp + %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)* + %v11 = load i32, i32 addrspace(1)* %tmp3, align 4 + %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v11, i32 addrspace(1)* %tmp4, align 4 + + %j2 = add nsw i32 %i2, 1024 + %tmp5 = sext i32 %j2 to i64 + %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 + %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)* + %v22 = load i32, i32 addrspace(1)* %tmp6, align 4 + %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v22, i32 addrspace(1)* %tmp7, align 4 + + ret void +} + +; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset( +; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i +; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383 + +; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i +; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383 +define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { +bb: + %i2 = shl nsw i32 %i, 1 + %j1 = add nsw i32 %i, 16383 + %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 + %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)* + %v11 = load i32, i32 addrspace(3)* %tmp3, align 4 + %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v11, i32 addrspace(1)* %tmp4, align 4 + + %j2 = add nsw i32 %i2, 16383 + %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 + %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)* + %v22 = load i32, i32 addrspace(3)* %tmp6, align 4 + %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v22, i32 addrspace(1)* %tmp7, align 4 + + ret void +} + +; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset( +; CHECK: %j1 = add nsw i32 %i, 16384 +; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 +; CHECK: %j2 = add i32 %j1, %i +; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 +define void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { +bb: + %i2 = shl nsw i32 %i, 1 + %j1 = add nsw i32 %i, 16384 + %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 + %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)* + %v11 = load i32, i32 addrspace(3)* %tmp3, align 4 + %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v11, i32 addrspace(1)* %tmp4, align 4 + + %j2 = add nsw i32 %i2, 16384 + %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 + %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)* + %v22 = load i32, i32 addrspace(3)* %tmp6, align 4 + %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v22, i32 addrspace(1)* %tmp7, align 4 + + ret void +} diff --git a/test/Transforms/StraightLineStrengthReduce/R600/lit.local.cfg b/test/Transforms/StraightLineStrengthReduce/R600/lit.local.cfg deleted file mode 100644 index ad9ce2541ef..00000000000 --- a/test/Transforms/StraightLineStrengthReduce/R600/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'R600' in config.root.targets: - config.unsupported = True diff --git a/test/Transforms/StraightLineStrengthReduce/R600/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/R600/reassociate-geps-and-slsr-addrspace.ll deleted file mode 100644 index 278250a9c80..00000000000 --- a/test/Transforms/StraightLineStrengthReduce/R600/reassociate-geps-and-slsr-addrspace.ll +++ /dev/null @@ -1,107 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s - -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" - - -; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset( -; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]] -; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]] -define void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 1023 - %tmp = sext i32 %j1 to i64 - %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp - %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)* - %v11 = load i32, i32 addrspace(1)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 1023 - %tmp5 = sext i32 %j2 to i64 - %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 - %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)* - %v22 = load i32, i32 addrspace(1)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -} - -; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset( -; CHECK: %j1 = add nsw i32 %i, 1024 -; CHECK: %tmp = sext i32 %j1 to i64 -; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp -; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 -define void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 1024 - %tmp = sext i32 %j1 to i64 - %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp - %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)* - %v11 = load i32, i32 addrspace(1)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 1024 - %tmp5 = sext i32 %j2 to i64 - %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 - %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)* - %v22 = load i32, i32 addrspace(1)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -} - -; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset( -; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i -; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383 - -; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i -; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383 -define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 16383 - %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 - %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)* - %v11 = load i32, i32 addrspace(3)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 16383 - %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 - %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)* - %v22 = load i32, i32 addrspace(3)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -} - -; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset( -; CHECK: %j1 = add nsw i32 %i, 16384 -; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 -; CHECK: %j2 = add i32 %j1, %i -; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 -define void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 16384 - %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 - %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)* - %v11 = load i32, i32 addrspace(3)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 16384 - %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 - %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)* - %v22 = load i32, i32 addrspace(3)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -}