1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
7 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
8 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
9 ; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
10 ; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
11 ; SI: BUFFER_STORE_DWORD [[EXTRACT]],
13 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
14 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
15 ; EG-NEXT: LSHR * [[ADDR]]
16 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
17 %shl = shl i32 %in, 31
18 %sext = ashr i32 %shl, 31
19 store i32 %sext, i32 addrspace(1)* %out
23 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
24 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
25 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
26 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
27 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
29 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
31 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
32 ; EG-NEXT: LSHR * [[ADDR]]
33 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
34 %c = add i32 %a, %b ; add to prevent folding into extload
36 %ashr = ashr i32 %shl, 24
37 store i32 %ashr, i32 addrspace(1)* %out, align 4
41 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
42 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
43 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
44 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
45 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
47 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
49 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
50 ; EG-NEXT: LSHR * [[ADDR]]
51 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
52 %c = add i32 %a, %b ; add to prevent folding into extload
54 %ashr = ashr i32 %shl, 16
55 store i32 %ashr, i32 addrspace(1)* %out, align 4
59 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
60 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
61 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
62 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
63 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
65 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
67 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
68 ; EG-NEXT: LSHR * [[ADDR]]
69 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
70 %c = add <1 x i32> %a, %b ; add to prevent folding into extload
71 %shl = shl <1 x i32> %c, <i32 24>
72 %ashr = ashr <1 x i32> %shl, <i32 24>
73 store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
77 ; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
78 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
79 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
80 ; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
81 ; SI: BUFFER_STORE_DWORDX2
82 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
85 %ashr = ashr i64 %shl, 63
86 store i64 %ashr, i64 addrspace(1)* %out, align 8
90 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
91 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
92 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
93 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
94 ; SI: BUFFER_STORE_DWORDX2
96 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
97 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
99 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
100 ; EG: ASHR [[RES_HI]]
104 ;; TODO Check address computation, using | with variables in {{}} does not work,
105 ;; also the _LO/_HI order might be different
106 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
108 %shl = shl i64 %c, 56
109 %ashr = ashr i64 %shl, 56
110 store i64 %ashr, i64 addrspace(1)* %out, align 8
114 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
115 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
116 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
117 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
118 ; SI: BUFFER_STORE_DWORDX2
120 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
121 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
123 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
124 ; EG: ASHR [[RES_HI]]
128 ;; TODO Check address computation, using | with variables in {{}} does not work,
129 ;; also the _LO/_HI order might be different
130 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
132 %shl = shl i64 %c, 48
133 %ashr = ashr i64 %shl, 48
134 store i64 %ashr, i64 addrspace(1)* %out, align 8
138 ; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
141 ; SI: S_ADD_I32 [[ADD:s[0-9]+]],
142 ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
143 ; SI: BUFFER_STORE_DWORDX2
145 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
146 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
148 ; EG: ADD_INT {{\*?}} [[RES_LO]]
149 ; EG: ASHR [[RES_HI]]
153 ;; TODO Check address computation, using | with variables in {{}} does not work,
154 ;; also the _LO/_HI order might be different
155 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
157 %shl = shl i64 %c, 32
158 %ashr = ashr i64 %shl, 32
159 store i64 %ashr, i64 addrspace(1)* %out, align 8
163 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
164 ; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
165 ; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
166 ; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
167 ; XSI: BUFFER_STORE_DWORD
170 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
171 ; %c = add <1 x i64> %a, %b
172 ; %shl = shl <1 x i64> %c, <i64 56>
173 ; %ashr = ashr <1 x i64> %shl, <i64 56>
174 ; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
178 ; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
180 ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
181 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
183 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
188 ; EG: LSHR {{\*?}} [[ADDR]]
189 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
193 store i32 %y, i32 addrspace(1)* %out
197 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
198 ; SI-DAG: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
199 ; SI-DAG: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
200 ; SI-DAG: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
201 ; SI-DAG: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
204 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
211 ; EG: LSHR {{\*?}} [[ADDR]]
212 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
213 %c = add <2 x i32> %a, %b
214 %x = shl <2 x i32> %c, <i32 6, i32 6>
215 %y = ashr <2 x i32> %x, <i32 7, i32 7>
216 store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
221 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
222 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
223 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
224 ; SI: BUFFER_STORE_DWORDX2
226 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
227 ; EG: BFE_INT [[RES]]
228 ; EG: BFE_INT [[RES]]
229 ; EG: LSHR {{\*?}} [[ADDR]]
230 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
231 %c = add <2 x i32> %a, %b ; add to prevent folding into extload
232 %shl = shl <2 x i32> %c, <i32 31, i32 31>
233 %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
234 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
238 ; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
239 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
240 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
241 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
242 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
243 ; SI: BUFFER_STORE_DWORDX4
245 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
246 ; EG: BFE_INT [[RES]]
247 ; EG: BFE_INT [[RES]]
248 ; EG: BFE_INT [[RES]]
249 ; EG: BFE_INT [[RES]]
250 ; EG: LSHR {{\*?}} [[ADDR]]
251 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
252 %c = add <4 x i32> %a, %b ; add to prevent folding into extload
253 %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
254 %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
255 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
259 ; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
260 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
261 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
262 ; SI: BUFFER_STORE_DWORDX2
264 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
265 ; EG: BFE_INT [[RES]]
266 ; EG: BFE_INT [[RES]]
267 ; EG: LSHR {{\*?}} [[ADDR]]
268 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
269 %c = add <2 x i32> %a, %b ; add to prevent folding into extload
270 %shl = shl <2 x i32> %c, <i32 24, i32 24>
271 %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
272 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
276 ; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
277 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
278 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
279 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
280 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
281 ; SI: BUFFER_STORE_DWORDX4
283 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
284 ; EG: BFE_INT [[RES]]
285 ; EG: BFE_INT [[RES]]
286 ; EG: BFE_INT [[RES]]
287 ; EG: BFE_INT [[RES]]
288 ; EG: LSHR {{\*?}} [[ADDR]]
289 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
290 %c = add <4 x i32> %a, %b ; add to prevent folding into extload
291 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
292 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
293 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
297 ; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
298 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
299 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
300 ; SI: BUFFER_STORE_DWORDX2
302 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
303 ; EG: BFE_INT [[RES]]
304 ; EG: BFE_INT [[RES]]
305 ; EG: LSHR {{\*?}} [[ADDR]]
306 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
307 %c = add <2 x i32> %a, %b ; add to prevent folding into extload
308 %shl = shl <2 x i32> %c, <i32 16, i32 16>
309 %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
310 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
314 ; FUNC-LABEL: {{^}}testcase:
315 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
316 %and_a_1 = and i8 %a, 1
317 %cmp_eq = icmp eq i8 %and_a_1, 0
318 %cmp_slt = icmp slt i8 %a, 0
319 %sel0 = select i1 %cmp_slt, i8 0, i8 %a
320 %sel1 = select i1 %cmp_eq, i8 0, i8 %a
321 %xor = xor i8 %sel0, %sel1
322 store i8 %xor, i8 addrspace(1)* %out
326 ; FUNC-LABEL: {{^}}testcase_3:
327 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
328 %and_a_1 = and i8 %a, 1
329 %cmp_eq = icmp eq i8 %and_a_1, 0
330 %cmp_slt = icmp slt i8 %a, 0
331 %sel0 = select i1 %cmp_slt, i8 0, i8 %a
332 %sel1 = select i1 %cmp_eq, i8 0, i8 %a
333 %xor = xor i8 %sel0, %sel1
334 store i8 %xor, i8 addrspace(1)* %out
338 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
339 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
340 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
341 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
342 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
343 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
344 %loada = load <4 x i32> addrspace(1)* %a, align 16
345 %loadb = load <4 x i32> addrspace(1)* %b, align 16
346 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
347 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
348 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
349 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
353 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
354 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
355 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
356 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
357 %loada = load <4 x i32> addrspace(1)* %a, align 16
358 %loadb = load <4 x i32> addrspace(1)* %b, align 16
359 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
360 %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
361 %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
362 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
366 ; FIXME: The BFE should really be eliminated. I think it should happen
367 ; when computeKnownBitsForTargetNode is implemented for imax.
369 ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
370 ; SI: BUFFER_LOAD_SBYTE
373 ; SI: BUFFER_STORE_SHORT
374 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
375 %tmp5 = load i8 addrspace(1)* %src, align 1
376 %tmp2 = sext i8 %tmp5 to i32
377 %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
378 %tmp4 = trunc i32 %tmp3 to i8
379 %tmp6 = sext i8 %tmp4 to i16
380 store i16 %tmp6, i16 addrspace(1)* %out, align 2
384 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
386 ; FUNC-LABEL: {{^}}bfe_0_width:
389 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
390 %load = load i32 addrspace(1)* %ptr, align 4
391 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
392 store i32 %bfe, i32 addrspace(1)* %out, align 4
396 ; FUNC-LABEL: {{^}}bfe_8_bfe_8:
400 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
401 %load = load i32 addrspace(1)* %ptr, align 4
402 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
403 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
404 store i32 %bfe1, i32 addrspace(1)* %out, align 4
408 ; FUNC-LABEL: {{^}}bfe_8_bfe_16:
409 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
411 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
412 %load = load i32 addrspace(1)* %ptr, align 4
413 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
414 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
415 store i32 %bfe1, i32 addrspace(1)* %out, align 4
419 ; This really should be folded into 1
420 ; FUNC-LABEL: {{^}}bfe_16_bfe_8:
421 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
424 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
425 %load = load i32 addrspace(1)* %ptr, align 4
426 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
427 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
428 store i32 %bfe1, i32 addrspace(1)* %out, align 4
432 ; Make sure there isn't a redundant BFE
433 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
434 ; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
437 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
438 %c = add i32 %a, %b ; add to prevent folding into extload
439 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
440 %shl = shl i32 %bfe, 24
441 %ashr = ashr i32 %shl, 24
442 store i32 %ashr, i32 addrspace(1)* %out, align 4
446 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
447 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
448 %c = add i32 %a, %b ; add to prevent folding into extload
449 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
450 %shl = shl i32 %bfe, 24
451 %ashr = ashr i32 %shl, 24
452 store i32 %ashr, i32 addrspace(1)* %out, align 4
456 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
457 ; SI: BUFFER_LOAD_SBYTE
460 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
461 %load = load i8 addrspace(1)* %ptr, align 1
462 %sext = sext i8 %load to i32
463 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
464 %shl = shl i32 %bfe, 24
465 %ashr = ashr i32 %shl, 24
466 store i32 %ashr, i32 addrspace(1)* %out, align 4
470 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
474 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
475 %load = load i8 addrspace(1)* %ptr, align 1
476 %sext = sext i8 %load to i32
477 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
478 %shl = shl i32 %bfe, 24
479 %ashr = ashr i32 %shl, 24
480 store i32 %ashr, i32 addrspace(1)* %out, align 4
484 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
487 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
489 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
490 %x = load i32 addrspace(1)* %in, align 4
491 %shl = shl i32 %x, 31
492 %shr = ashr i32 %shl, 31
493 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
494 store i32 %bfe, i32 addrspace(1)* %out, align 4
498 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
499 ; SI: BUFFER_LOAD_DWORD
502 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
504 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
505 %x = load i32 addrspace(1)* %in, align 4
506 %shl = shl i32 %x, 30
507 %shr = ashr i32 %shl, 30
508 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
509 store i32 %bfe, i32 addrspace(1)* %out, align 4
513 ; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
514 ; SI: BUFFER_LOAD_DWORD
515 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
516 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
517 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
519 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
520 %x = load i32 addrspace(1)* %in, align 4
521 %shl = shl i32 %x, 30
522 %shr = ashr i32 %shl, 30
523 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
524 store i32 %bfe, i32 addrspace(1)* %out, align 4