1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2 ; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4 ; FIXME: Enable for VI.
6 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
7 declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
8 declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
9 declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
11 ; GCN-LABEL: {{^}}test_div_fmas_f32:
12 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
13 ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
14 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
15 ; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
16 ; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
17 ; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
18 ; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
19 ; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
20 ; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
21 ; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], [[VC]]
22 ; GCN: buffer_store_dword [[RESULT]],
24 define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
25 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
26 store float %result, float addrspace(1)* %out, align 4
30 ; GCN-LABEL: {{^}}test_div_fmas_f64:
32 define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
33 %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
34 store double %result, double addrspace(1)* %out, align 8
38 ; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc:
39 ; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0
40 ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
41 define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
42 %cmp = icmp eq i32 %i, 0
43 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone
44 store float %result, float addrspace(1)* %out, align 4
48 ; GCN-LABEL: {{^}}test_div_fmas_f32_imm_false_cond_to_vcc:
49 ; SI: s_mov_b64 vcc, 0
50 ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
51 define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
52 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone
53 store float %result, float addrspace(1)* %out, align 4
57 ; GCN-LABEL: {{^}}test_div_fmas_f32_imm_true_cond_to_vcc:
58 ; SI: s_mov_b64 vcc, -1
59 ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
60 define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
61 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone
62 store float %result, float addrspace(1)* %out, align 4
66 ; GCN-LABEL: {{^}}test_div_fmas_f32_logical_cond_to_vcc:
67 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
68 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
69 ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
71 ; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
72 ; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
73 ; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
74 ; SI: v_div_fmas_f32 {{v[0-9]+}}, [[B]], [[A]], [[C]]
76 define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind {
77 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
78 %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
79 %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
80 %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
81 %gep.out = getelementptr float addrspace(1)* %out, i32 2
83 %a = load float addrspace(1)* %gep.a
84 %b = load float addrspace(1)* %gep.b
85 %c = load float addrspace(1)* %gep.c
87 %cmp0 = icmp eq i32 %tid, 0
88 %cmp1 = icmp ne i32 %d, 0
89 %and = and i1 %cmp0, %cmp1
91 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone
92 store float %result, float addrspace(1)* %gep.out, align 4
96 ; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
97 ; SI: v_cmp_eq_i32_e64 [[CMPTID:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
98 ; SI: s_and_saveexec_b64 [[CMPTID]], [[CMPTID]]
99 ; SI: s_xor_b64 [[CMPTID]], exec, [[CMPTID]]
101 ; SI: buffer_load_dword [[LOAD:v[0-9]+]]
102 ; SI: v_cmp_ne_i32_e64 [[CMPLOAD:s\[[0-9]+:[0-9]+\]]], [[LOAD]], 0
103 ; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, [[CMPLOAD]]
107 ; SI: s_or_b64 exec, exec, [[CMPTID]]
108 ; SI: v_cmp_ne_i32_e32 vcc, 0, v0
109 ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
110 ; SI: buffer_store_dword
112 define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind {
114 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
115 %gep.out = getelementptr float addrspace(1)* %out, i32 2
116 %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
117 %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
118 %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
120 %a = load float addrspace(1)* %gep.a
121 %b = load float addrspace(1)* %gep.b
122 %c = load float addrspace(1)* %gep.c
124 %cmp0 = icmp eq i32 %tid, 0
125 br i1 %cmp0, label %bb, label %exit
128 %val = load i32 addrspace(1)* %dummy
129 %cmp1 = icmp ne i32 %val, 0
133 %cond = phi i1 [false, %entry], [%cmp1, %bb]
134 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
135 store float %result, float addrspace(1)* %gep.out, align 4