From: Matt Arsenault Date: Sun, 13 Jul 2014 02:46:17 +0000 (+0000) Subject: R600: Run more tests with promote alloca disabled. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=98c229953228a939cfd2c628eb0a1e704e522ca8;p=oota-llvm.git R600: Run more tests with promote alloca disabled. Re-run tests changed in r211110 to test both paths. Also fix broken check line. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212895 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll index 3230353c36c..2ddc61f3972 100644 --- a/test/CodeGen/R600/array-ptr-calc-i32.ll +++ b/test/CodeGen/R600/array-ptr-calc-i32.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate @@ -9,13 +10,17 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; be 32-bits. ; SI-LABEL: @test_private_array_ptr_calc: + ; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]] + +; SI-ALLOCA: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]] ; ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this ; alloca to a vector. It currently fails because it does not know how ; to interpret: ; getelementptr [4 x i32]* %alloca, i32 1, i32 %b -; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]] + +; SI-PROMOTE: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]] define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { %alloca = alloca [4 x i32], i32 4, align 16 %tid = call i32 @llvm.SI.tid() readnone diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll index b127b7ede2e..1e23fd780ec 100644 --- a/test/CodeGen/R600/indirect-private-64.ll +++ b/test/CodeGen/R600/indirect-private-64.ll @@ -1,10 +1,18 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s + declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind ; SI-LABEL: @private_access_f64_alloca: -; SI: DS_WRITE_B64 -; SI: DS_READ_B64 + +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 + +; SI-PROMOTE: DS_WRITE_B64 +; SI-PROMOTE: DS_READ_B64 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind { %val = load double addrspace(1)* %in, align 8 %array = alloca double, i32 16, align 8 @@ -17,10 +25,16 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double } ; SI-LABEL: @private_access_v2f64_alloca: -; SI: DS_WRITE_B64 -; SI: DS_WRITE_B64 -; SI: DS_READ_B64 -; SI: DS_READ_B64 + +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 + +; SI-PROMOTE: DS_WRITE_B64 +; SI-PROMOTE: DS_WRITE_B64 +; SI-PROMOTE: DS_READ_B64 +; SI-PROMOTE: DS_READ_B64 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind { %val = load <2 x double> addrspace(1)* %in, align 16 %array = alloca <2 x double>, i32 16, align 16 @@ -33,8 +47,14 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out } ; SI-LABEL: @private_access_i64_alloca: -; SI: DS_WRITE_B64 -; SI: DS_READ_B64 + +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 + +; SI-PROMOTE: DS_WRITE_B64 +; SI-PROMOTE: DS_READ_B64 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind { %val = load i64 addrspace(1)* %in, align 8 %array = alloca i64, i32 16, align 8 @@ -47,10 +67,20 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs } ; SI-LABEL: @private_access_v2i64_alloca: -; SI: DS_WRITE_B64 -; SI: DS_WRITE_B64 -; SI: DS_READ_B64 -; SI: DS_READ_B64 + +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELD_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: V_MOVRELS_B32_e32 + +; SI-PROMOTE: DS_WRITE_B64 +; SI-PROMOTE: DS_WRITE_B64 +; SI-PROMOTE: DS_READ_B64 +; SI-PROMOTE: DS_READ_B64 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind { %val = load <2 x i64> addrspace(1)* %in, align 16 %array = alloca <2 x i64>, i32 16, align 16 diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll index dee432664e8..e6f8ce8ef0e 100644 --- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll +++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll @@ -1,6 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s - ; XFAIL: * +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s ; 64-bit select was originally lowered with a build_pair, and this ; could be simplified to 1 cndmask instead of 2, but that broken when @@ -16,8 +15,8 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) { ; FIXME: Fix truncating store for local memory ; SI-LABEL: @trunc_load_alloca_i64: -; SI: DS_READ_B32 -; SI-NOT: DS_READ_B64 +; SI: V_MOVRELS_B32 +; SI-NOT: V_MOVRELS_B32 ; SI: S_ENDPGM define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) { %idx = add i32 %a, %b diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll index 6543f6d0593..ec1995f6808 100644 --- a/test/CodeGen/R600/vector-alloca.ll +++ b/test/CodeGen/R600/vector-alloca.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: @vector_read ; EG: MOV @@ -53,7 +54,7 @@ entry: ; This test should be optimize to: ; store i32 0, i32 addrspace(1)* %out ; FUNC-LABEL: @bitcast_gep -; CHECK: STORE_RAW +; EG: STORE_RAW define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { entry: %0 = alloca [4 x i32]