1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s
4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s
5 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
8 ; FUNC-LABEL: {{^}}ngroups_x:
9 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
10 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
12 ; HSA: .amd_kernel_code_t
14 ; HSA: enable_sgpr_private_segment_buffer = 1
15 ; HSA: enable_sgpr_dispatch_ptr = 0
16 ; HSA: enable_sgpr_queue_ptr = 0
17 ; HSA: enable_sgpr_kernarg_segment_ptr = 1
18 ; HSA: enable_sgpr_dispatch_id = 0
19 ; HSA: enable_sgpr_flat_scratch_init = 0
20 ; HSA: enable_sgpr_private_segment_size = 0
21 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
22 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
23 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
25 ; HSA: .end_amd_kernel_code_t
28 ; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
29 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
30 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
32 define void @ngroups_x (i32 addrspace(1)* %out) {
34 %0 = call i32 @llvm.r600.read.ngroups.x() #0
35 store i32 %0, i32 addrspace(1)* %out
39 ; FUNC-LABEL: {{^}}ngroups_y:
40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
41 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
43 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
44 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
45 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
46 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
47 define void @ngroups_y (i32 addrspace(1)* %out) {
49 %0 = call i32 @llvm.r600.read.ngroups.y() #0
50 store i32 %0, i32 addrspace(1)* %out
54 ; FUNC-LABEL: {{^}}ngroups_z:
55 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
56 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
58 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
59 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
60 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
61 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
62 define void @ngroups_z (i32 addrspace(1)* %out) {
64 %0 = call i32 @llvm.r600.read.ngroups.z() #0
65 store i32 %0, i32 addrspace(1)* %out
69 ; FUNC-LABEL: {{^}}global_size_x:
70 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
71 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
73 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
74 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
75 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
76 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
77 define void @global_size_x (i32 addrspace(1)* %out) {
79 %0 = call i32 @llvm.r600.read.global.size.x() #0
80 store i32 %0, i32 addrspace(1)* %out
84 ; FUNC-LABEL: {{^}}global_size_y:
85 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
86 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
88 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
89 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
90 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
91 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
92 define void @global_size_y (i32 addrspace(1)* %out) {
94 %0 = call i32 @llvm.r600.read.global.size.y() #0
95 store i32 %0, i32 addrspace(1)* %out
99 ; FUNC-LABEL: {{^}}global_size_z:
100 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
101 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
103 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
104 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
105 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
106 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
107 define void @global_size_z (i32 addrspace(1)* %out) {
109 %0 = call i32 @llvm.r600.read.global.size.z() #0
110 store i32 %0, i32 addrspace(1)* %out
114 ; The tgid values are stored in sgprs offset by the number of user
117 ; FUNC-LABEL: {{^}}tgid_x:
118 ; HSA: .amd_kernel_code_t
119 ; HSA: compute_pgm_rsrc2_user_sgpr = 6
120 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1
121 ; HSA: compute_pgm_rsrc2_tgid_y_en = 0
122 ; HSA: compute_pgm_rsrc2_tgid_z_en = 0
123 ; HSA: compute_pgm_rsrc2_tg_size_en = 0
124 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
125 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
126 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
127 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
128 ; HSA: .end_amd_kernel_code_t
130 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
131 ; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}}
132 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
133 ; HSA: flat_store_dword [[VVAL]]
135 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
136 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
137 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
138 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
139 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
140 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
141 define void @tgid_x(i32 addrspace(1)* %out) {
143 %0 = call i32 @llvm.r600.read.tgid.x() #0
144 store i32 %0, i32 addrspace(1)* %out
148 ; FUNC-LABEL: {{^}}tgid_y:
149 ; HSA: compute_pgm_rsrc2_user_sgpr = 6
150 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1
151 ; HSA: compute_pgm_rsrc2_tgid_y_en = 1
152 ; HSA: compute_pgm_rsrc2_tgid_z_en = 0
153 ; HSA: compute_pgm_rsrc2_tg_size_en = 0
154 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
155 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
156 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
157 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
158 ; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7
159 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
160 ; HSA: flat_store_dword [[VVAL]]
162 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
163 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
164 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
165 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
166 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
167 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
168 define void @tgid_y(i32 addrspace(1)* %out) {
170 %0 = call i32 @llvm.r600.read.tgid.y() #0
171 store i32 %0, i32 addrspace(1)* %out
175 ; FUNC-LABEL: {{^}}tgid_z:
176 ; HSA: compute_pgm_rsrc2_user_sgpr = 6
177 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1
178 ; HSA: compute_pgm_rsrc2_tgid_y_en = 0
179 ; HSA: compute_pgm_rsrc2_tgid_z_en = 1
180 ; HSA: compute_pgm_rsrc2_tg_size_en = 0
181 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
182 ; HSA: enable_sgpr_private_segment_buffer = 1
183 ; HSA: enable_sgpr_dispatch_ptr = 0
184 ; HSA: enable_sgpr_queue_ptr = 0
185 ; HSA: enable_sgpr_kernarg_segment_ptr = 1
186 ; HSA: enable_sgpr_dispatch_id = 0
187 ; HSA: enable_sgpr_flat_scratch_init = 0
188 ; HSA: enable_sgpr_private_segment_size = 0
189 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
190 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
191 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
193 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
194 ; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}}
195 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
196 ; HSA: flat_store_dword [[VVAL]]
198 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
199 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
200 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
201 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
202 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
203 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
204 define void @tgid_z(i32 addrspace(1)* %out) {
206 %0 = call i32 @llvm.r600.read.tgid.z() #0
207 store i32 %0, i32 addrspace(1)* %out
211 ; GCN-NOHSA: .section .AMDGPU.config
212 ; GCN-NOHSA: .long 47180
213 ; GCN-NOHSA-NEXT: .long 132{{$}}
215 ; FUNC-LABEL: {{^}}tidig_x:
216 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
217 ; GCN-NOHSA: buffer_store_dword v0
218 ; HSA: flat_store_dword v0
219 define void @tidig_x(i32 addrspace(1)* %out) {
221 %0 = call i32 @llvm.r600.read.tidig.x() #0
222 store i32 %0, i32 addrspace(1)* %out
226 ; GCN-NOHSA: .section .AMDGPU.config
227 ; GCN-NOHSA: .long 47180
228 ; GCN-NOHSA-NEXT: .long 2180{{$}}
230 ; FUNC-LABEL: {{^}}tidig_y:
232 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1
233 ; GCN-NOHSA: buffer_store_dword v1
234 ; HSA: flat_store_dword v1
235 define void @tidig_y(i32 addrspace(1)* %out) {
237 %0 = call i32 @llvm.r600.read.tidig.y() #0
238 store i32 %0, i32 addrspace(1)* %out
242 ; GCN-NOHSA: .section .AMDGPU.config
243 ; GCN-NOHSA: .long 47180
244 ; GCN-NOHSA-NEXT: .long 4228{{$}}
246 ; FUNC-LABEL: {{^}}tidig_z:
247 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2
248 ; GCN-NOHSA: buffer_store_dword v2
249 ; HSA: flat_store_dword v2
250 define void @tidig_z(i32 addrspace(1)* %out) {
252 %0 = call i32 @llvm.r600.read.tidig.z() #0
253 store i32 %0, i32 addrspace(1)* %out
257 declare i32 @llvm.r600.read.ngroups.x() #0
258 declare i32 @llvm.r600.read.ngroups.y() #0
259 declare i32 @llvm.r600.read.ngroups.z() #0
261 declare i32 @llvm.r600.read.global.size.x() #0
262 declare i32 @llvm.r600.read.global.size.y() #0
263 declare i32 @llvm.r600.read.global.size.z() #0
265 declare i32 @llvm.r600.read.tgid.x() #0
266 declare i32 @llvm.r600.read.tgid.y() #0
267 declare i32 @llvm.r600.read.tgid.z() #0
269 declare i32 @llvm.r600.read.tidig.x() #0
270 declare i32 @llvm.r600.read.tidig.y() #0
271 declare i32 @llvm.r600.read.tidig.z() #0
273 declare i32 @llvm.AMDGPU.read.workdim() #0
275 attributes #0 = { readnone }