1 ; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5 ; Check vectorization on an interleaved load group of factor 2 and an interleaved
6 ; store group of factor 2.
10 ; void test_array_load2_store2(int C, int D) {
11 ; for (int i = 0; i < 1024; i+=2) {
19 ; CHECK-LABEL: @test_array_load2_store2(
20 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
21 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
22 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23 ; CHECK: add nsw <4 x i32>
24 ; CHECK: mul nsw <4 x i32>
25 ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
26 ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
28 @AB = common global [1024 x i32] zeroinitializer, align 4
29 @CD = common global [1024 x i32] zeroinitializer, align 4
31 define void @test_array_load2_store2(i32 %C, i32 %D) {
35 for.body: ; preds = %for.body, %entry
36 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
37 %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
38 %tmp = load i32, i32* %arrayidx0, align 4
39 %tmp1 = or i64 %indvars.iv, 1
40 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
41 %tmp2 = load i32, i32* %arrayidx1, align 4
42 %add = add nsw i32 %tmp, %C
43 %mul = mul nsw i32 %tmp2, %D
44 %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
45 store i32 %add, i32* %arrayidx2, align 4
46 %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
47 store i32 %mul, i32* %arrayidx3, align 4
48 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
49 %cmp = icmp slt i64 %indvars.iv.next, 1024
50 br i1 %cmp, label %for.body, label %for.end
52 for.end: ; preds = %for.body
58 ; void test_struct_st3() {
60 ; for (int i = 0; i < 1024; i++) {
70 ; CHECK-LABEL: @test_struct_array_load3_store3(
71 ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
72 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
73 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
74 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
75 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
76 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
77 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
78 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
79 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
80 ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
81 ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
83 %struct.ST3 = type { i32, i32, i32 }
84 @A = common global [3072 x i32] zeroinitializer, align 4
85 @S = common global [1024 x %struct.ST3] zeroinitializer, align 4
87 define void @test_struct_array_load3_store3() {
91 for.body: ; preds = %for.body, %entry
92 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
93 %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
94 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
95 %tmp = load i32, i32* %ptr.016, align 4
96 %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
97 %tmp1 = load i32, i32* %incdec.ptr, align 4
98 %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
99 %tmp2 = load i32, i32* %incdec.ptr1, align 4
100 %add = add nsw i32 %tmp, 1
101 %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
102 store i32 %add, i32* %x, align 4
103 %add3 = add nsw i32 %tmp1, 2
104 %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
105 store i32 %add3, i32* %y, align 4
106 %add6 = add nsw i32 %tmp2, 3
107 %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
108 store i32 %add6, i32* %z, align 4
109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
110 %exitcond = icmp eq i64 %indvars.iv.next, 1024
111 br i1 %exitcond, label %for.end, label %for.body
113 for.end: ; preds = %for.body
117 ; Check vectorization on an interleaved load group of factor 4.
125 ; int test_struct_load4(struct ST4 *S) {
127 ; for (int i = 0; i < 1024; i++) {
136 ; CHECK-LABEL: @test_struct_load4(
137 ; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
138 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
139 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
140 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
141 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
142 ; CHECK: add nsw <4 x i32>
143 ; CHECK: sub <4 x i32>
144 ; CHECK: add nsw <4 x i32>
145 ; CHECK: sub <4 x i32>
147 %struct.ST4 = type { i32, i32, i32, i32 }
149 define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
153 for.body: ; preds = %for.body, %entry
154 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
155 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
156 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
157 %tmp = load i32, i32* %x, align 4
158 %add = add nsw i32 %tmp, %r.022
159 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
160 %tmp1 = load i32, i32* %y, align 4
161 %sub = sub i32 %add, %tmp1
162 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
163 %tmp2 = load i32, i32* %z, align 4
164 %add5 = add nsw i32 %sub, %tmp2
165 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
166 %tmp3 = load i32, i32* %w, align 4
167 %sub8 = sub i32 %add5, %tmp3
168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
169 %exitcond = icmp eq i64 %indvars.iv.next, 1024
170 br i1 %exitcond, label %for.end, label %for.body
172 for.end: ; preds = %for.body
176 ; Check vectorization on an interleaved store group of factor 4.
178 ; void test_struct_store4(int *A, struct ST4 *B) {
180 ; for (int i = 0; i < 1024; i++) {
189 ; CHECK-LABEL: @test_struct_store4(
190 ; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>*
191 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
192 ; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
193 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
194 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
195 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
196 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
197 ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
198 ; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
200 define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
204 for.cond.cleanup: ; preds = %for.body
207 for.body: ; preds = %for.body, %entry
208 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
209 %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
210 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
211 %tmp = load i32, i32* %ptr.024, align 4
212 %add = add nsw i32 %tmp, 1
213 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
214 store i32 %add, i32* %x, align 4
215 %mul = shl nsw i32 %tmp, 1
216 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
217 store i32 %mul, i32* %y, align 4
218 %add3 = add nsw i32 %tmp, 3
219 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
220 store i32 %add3, i32* %z, align 4
221 %add6 = add nsw i32 %tmp, 4
222 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
223 store i32 %add6, i32* %w, align 4
224 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
225 %exitcond = icmp eq i64 %indvars.iv.next, 1024
226 br i1 %exitcond, label %for.cond.cleanup, label %for.body
229 ; Check vectorization on a reverse interleaved load group of factor 2 and
230 ; a reverse interleaved store group of factor 2.
237 ; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
238 ; for (int i = 1023; i >= 0; i--) {
239 ; int a = A[i].x + i; // interleaved load of index 0
240 ; int b = A[i].y - i; // interleaved load of index 1
241 ; B[i].x = a; // interleaved store of index 0
242 ; B[i].y = b; // interleaved store of index 1
246 ; CHECK-LABEL: @test_reversed_load2_store2(
247 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
248 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
249 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
250 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
251 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
252 ; CHECK: add nsw <4 x i32>
253 ; CHECK: sub nsw <4 x i32>
254 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
255 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
256 ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
257 ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
259 %struct.ST2 = type { i32, i32 }
261 define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
265 for.cond.cleanup: ; preds = %for.body
268 for.body: ; preds = %for.body, %entry
269 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
270 %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
271 %tmp = load i32, i32* %x, align 4
272 %tmp1 = trunc i64 %indvars.iv to i32
273 %add = add nsw i32 %tmp, %tmp1
274 %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
275 %tmp2 = load i32, i32* %y, align 4
276 %sub = sub nsw i32 %tmp2, %tmp1
277 %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
278 store i32 %add, i32* %x5, align 4
279 %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
280 store i32 %sub, i32* %y8, align 4
281 %indvars.iv.next = add nsw i64 %indvars.iv, -1
282 %cmp = icmp sgt i64 %indvars.iv, 0
283 br i1 %cmp, label %for.body, label %for.cond.cleanup
286 ; Check vectorization on an interleaved load group of factor 2 with 1 gap
287 ; (missing the load of odd elements).
289 ; void even_load(int *A, int *B) {
290 ; for (unsigned i = 0; i < 1024; i+=2)
294 ; CHECK-LABEL: @even_load(
295 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
296 ; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
297 ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
298 ; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
300 define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
304 for.cond.cleanup: ; preds = %for.body
307 for.body: ; preds = %for.body, %entry
308 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
309 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
310 %tmp = load i32, i32* %arrayidx, align 4
311 %mul = shl nsw i32 %tmp, 1
312 %tmp1 = lshr exact i64 %indvars.iv, 1
313 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
314 store i32 %mul, i32* %arrayidx2, align 4
315 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
316 %cmp = icmp ult i64 %indvars.iv.next, 1024
317 br i1 %cmp, label %for.body, label %for.cond.cleanup
320 ; Check vectorization on interleaved access groups identified from mixed
322 ; void mixed_load2_store2(int *A, int *B) {
323 ; for (unsigned i = 0; i < 1024; i+=2) {
324 ; B[i] = A[i] * A[i+1];
325 ; B[i+1] = A[i] + A[i+1];
329 ; CHECK-LABEL: @mixed_load2_store2(
330 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
331 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
332 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
333 ; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
334 ; CHECK: store <8 x i32> %interleaved.vec
336 define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
340 for.cond.cleanup: ; preds = %for.body
343 for.body: ; preds = %for.body, %entry
344 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
345 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
346 %tmp = load i32, i32* %arrayidx, align 4
347 %tmp1 = or i64 %indvars.iv, 1
348 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
349 %tmp2 = load i32, i32* %arrayidx2, align 4
350 %mul = mul nsw i32 %tmp2, %tmp
351 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
352 store i32 %mul, i32* %arrayidx4, align 4
353 %tmp3 = load i32, i32* %arrayidx, align 4
354 %tmp4 = load i32, i32* %arrayidx2, align 4
355 %add10 = add nsw i32 %tmp4, %tmp3
356 %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
357 store i32 %add10, i32* %arrayidx13, align 4
358 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
359 %cmp = icmp ult i64 %indvars.iv.next, 1024
360 br i1 %cmp, label %for.body, label %for.cond.cleanup
363 ; Check vectorization on interleaved access groups identified from mixed
365 ; void mixed_load3_store3(int *A) {
366 ; for (unsigned i = 0; i < 1024; i++) {
373 ; CHECK-LABEL: @mixed_load3_store3(
374 ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
375 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
376 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
377 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
378 ; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
379 ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
381 define void @mixed_load3_store3(i32* nocapture %A) {
385 for.cond.cleanup: ; preds = %for.body
388 for.body: ; preds = %for.body, %entry
389 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
390 %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
391 %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
392 %tmp = load i32, i32* %A.addr.012, align 4
393 %add = add i32 %tmp, %i.013
394 store i32 %add, i32* %A.addr.012, align 4
395 %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
396 %tmp1 = load i32, i32* %incdec.ptr, align 4
397 %add2 = add i32 %tmp1, %i.013
398 store i32 %add2, i32* %incdec.ptr, align 4
399 %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
400 %tmp2 = load i32, i32* %incdec.ptr1, align 4
401 %add4 = add i32 %tmp2, %i.013
402 store i32 %add4, i32* %incdec.ptr1, align 4
403 %inc = add nuw nsw i32 %i.013, 1
404 %exitcond = icmp eq i32 %inc, 1024
405 br i1 %exitcond, label %for.cond.cleanup, label %for.body
408 ; Check vectorization on interleaved access groups with members having different
419 ; void int_float_struct(struct IntFloat *A) {
422 ; for (unsigned i = 0; i < 1024; i++) {
430 ; CHECK-LABEL: @int_float_struct(
431 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
432 ; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
433 ; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
434 ; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
435 ; CHECK: add nsw <4 x i32>
436 ; CHECK: fadd fast <4 x float>
438 %struct.IntFloat = type { i32, float }
440 @SA = common global i32 0, align 4
441 @SB = common global float 0.000000e+00, align 4
443 define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
447 for.cond.cleanup: ; preds = %for.body
448 store i32 %add, i32* @SA, align 4
449 store float %add3, float* @SB, align 4
452 for.body: ; preds = %for.body, %entry
453 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
454 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
455 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
456 %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
457 %tmp = load i32, i32* %a, align 4
458 %add = add nsw i32 %tmp, %SumA.013
459 %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
460 %tmp1 = load float, float* %b, align 4
461 %add3 = fadd fast float %SumB.014, %tmp1
462 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
463 %exitcond = icmp eq i64 %indvars.iv.next, 1024
464 br i1 %exitcond, label %for.cond.cleanup, label %for.body
467 attributes #0 = { "unsafe-fp-math"="true" }