1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
3 define float @test1(float %a) {
6 ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
8 %t1 = fadd float %a, %a
9 %r = fadd float %t1, %t1
13 define float @test2(float %a) {
16 ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
18 %t1 = fmul float 4.0, %a
19 %t2 = fadd float %a, %a
20 %r = fadd float %t1, %t2
24 define float @test3(float %a) {
27 ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
29 %t1 = fmul float %a, 4.0
30 %t2 = fadd float %a, %a
31 %r = fadd float %t1, %t2
35 define float @test4(float %a) {
38 ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
40 %t1 = fadd float %a, %a
41 %t2 = fmul float 4.0, %a
42 %r = fadd float %t1, %t2
46 define float @test5(float %a) {
49 ; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
51 %t1 = fadd float %a, %a
52 %t2 = fmul float %a, 4.0
53 %r = fadd float %t1, %t2
57 define float @test6(float %a) {
60 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
62 %t1 = fmul float 2.0, %a
63 %t2 = fadd float %a, %a
64 %r = fsub float %t1, %t2
68 define float @test7(float %a) {
71 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
73 %t1 = fmul float %a, 2.0
74 %t2 = fadd float %a, %a
75 %r = fsub float %t1, %t2
79 define float @test8(float %a) {
83 %t1 = fmul float %a, 0.0
84 %t2 = fadd float %a, %t1
88 define float @test9(float %a) {
92 %t1 = fmul float 0.0, %a
93 %t2 = fadd float %t1, %a
97 define float @test10(float %a) {
98 ; CHECK-LABEL: test10:
100 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
102 %t1 = fsub float -0.0, %a
103 %t2 = fadd float %a, %t1
107 define float @test11(float %a) {
108 ; CHECK-LABEL: test11:
110 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
112 %t1 = fsub float -0.0, %a
113 %t2 = fadd float %a, %t1
117 ; Verify that the first two adds are independent regardless of how the inputs are
118 ; commuted. The destination registers are used as source registers for the third add.
120 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
121 ; CHECK-LABEL: reassociate_adds1:
123 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
124 ; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
125 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
127 %t0 = fadd float %x0, %x1
128 %t1 = fadd float %t0, %x2
129 %t2 = fadd float %t1, %x3
133 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
134 ; CHECK-LABEL: reassociate_adds2:
136 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
137 ; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
138 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
140 %t0 = fadd float %x0, %x1
141 %t1 = fadd float %x2, %t0
142 %t2 = fadd float %t1, %x3
146 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
147 ; CHECK-LABEL: reassociate_adds3:
149 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
150 ; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
151 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
153 %t0 = fadd float %x0, %x1
154 %t1 = fadd float %t0, %x2
155 %t2 = fadd float %x3, %t1
159 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
160 ; CHECK-LABEL: reassociate_adds4:
162 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
163 ; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
164 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
166 %t0 = fadd float %x0, %x1
167 %t1 = fadd float %x2, %t0
168 %t2 = fadd float %x3, %t1
172 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
173 ; produced because that would cost more compile time.
175 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
176 ; CHECK-LABEL: reassociate_adds5:
178 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
179 ; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
180 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
181 ; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1
182 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
183 ; CHECK-NEXT: vaddss %xmm7, %xmm6, %xmm1
184 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
186 %t0 = fadd float %x0, %x1
187 %t1 = fadd float %t0, %x2
188 %t2 = fadd float %t1, %x3
189 %t3 = fadd float %t2, %x4
190 %t4 = fadd float %t3, %x5
191 %t5 = fadd float %t4, %x6
192 %t6 = fadd float %t5, %x7