1 ; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5 @A = common global [1024 x i32] zeroinitializer, align 16
9 ; Turn this into a max reduction. Make sure we use a splat to initialize the
10 ; vector for the reduction.
12 ; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0
13 ; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer
14 ; CHECK: icmp sgt <2 x i32>
15 ; CHECK: select <2 x i1>
17 ; CHECK: icmp sgt <2 x i32>
18 ; CHECK: select <2 x i1>
20 define i32 @max_red(i32 %max) {
25 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
26 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
27 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
28 %0 = load i32* %arrayidx, align 4
29 %cmp3 = icmp sgt i32 %0, %max.red.08
30 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
31 %indvars.iv.next = add i64 %indvars.iv, 1
32 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
33 %exitcond = icmp eq i32 %lftr.wideiv, 1024
34 br i1 %exitcond, label %for.end, label %for.body
40 ; Turn this into a max reduction. The select has its inputs reversed therefore
41 ; this is a max reduction.
42 ; CHECK: @max_red_inverse_select
43 ; CHECK: icmp slt <2 x i32>
44 ; CHECK: select <2 x i1>
46 ; CHECK: icmp sgt <2 x i32>
47 ; CHECK: select <2 x i1>
49 define i32 @max_red_inverse_select(i32 %max) {
54 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
55 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
56 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
57 %0 = load i32* %arrayidx, align 4
58 %cmp3 = icmp slt i32 %max.red.08, %0
59 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
60 %indvars.iv.next = add i64 %indvars.iv, 1
61 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
62 %exitcond = icmp eq i32 %lftr.wideiv, 1024
63 br i1 %exitcond, label %for.end, label %for.body
69 ; Turn this into a min reduction.
71 ; CHECK: icmp slt <2 x i32>
72 ; CHECK: select <2 x i1>
74 ; CHECK: icmp slt <2 x i32>
75 ; CHECK: select <2 x i1>
77 define i32 @min_red(i32 %max) {
82 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
83 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
84 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
85 %0 = load i32* %arrayidx, align 4
86 %cmp3 = icmp slt i32 %0, %max.red.08
87 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
88 %indvars.iv.next = add i64 %indvars.iv, 1
89 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
90 %exitcond = icmp eq i32 %lftr.wideiv, 1024
91 br i1 %exitcond, label %for.end, label %for.body
97 ; Turn this into a min reduction. The select has its inputs reversed therefore
98 ; this is a min reduction.
99 ; CHECK: @min_red_inverse_select
100 ; CHECK: icmp sgt <2 x i32>
101 ; CHECK: select <2 x i1>
102 ; CHECK: middle.block
103 ; CHECK: icmp slt <2 x i32>
104 ; CHECK: select <2 x i1>
106 define i32 @min_red_inverse_select(i32 %max) {
111 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
112 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
113 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
114 %0 = load i32* %arrayidx, align 4
115 %cmp3 = icmp sgt i32 %max.red.08, %0
116 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
117 %indvars.iv.next = add i64 %indvars.iv, 1
118 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
119 %exitcond = icmp eq i32 %lftr.wideiv, 1024
120 br i1 %exitcond, label %for.end, label %for.body
128 ; Turn this into a max reduction.
130 ; CHECK: icmp ugt <2 x i32>
131 ; CHECK: select <2 x i1>
132 ; CHECK: middle.block
133 ; CHECK: icmp ugt <2 x i32>
134 ; CHECK: select <2 x i1>
136 define i32 @umax_red(i32 %max) {
141 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
142 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
143 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
144 %0 = load i32* %arrayidx, align 4
145 %cmp3 = icmp ugt i32 %0, %max.red.08
146 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
147 %indvars.iv.next = add i64 %indvars.iv, 1
148 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
149 %exitcond = icmp eq i32 %lftr.wideiv, 1024
150 br i1 %exitcond, label %for.end, label %for.body
156 ; Turn this into a max reduction. The select has its inputs reversed therefore
157 ; this is a max reduction.
158 ; CHECK: @umax_red_inverse_select
159 ; CHECK: icmp ult <2 x i32>
160 ; CHECK: select <2 x i1>
161 ; CHECK: middle.block
162 ; CHECK: icmp ugt <2 x i32>
163 ; CHECK: select <2 x i1>
165 define i32 @umax_red_inverse_select(i32 %max) {
170 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
171 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
172 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
173 %0 = load i32* %arrayidx, align 4
174 %cmp3 = icmp ult i32 %max.red.08, %0
175 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
176 %indvars.iv.next = add i64 %indvars.iv, 1
177 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
178 %exitcond = icmp eq i32 %lftr.wideiv, 1024
179 br i1 %exitcond, label %for.end, label %for.body
185 ; Turn this into a min reduction.
187 ; CHECK: icmp ult <2 x i32>
188 ; CHECK: select <2 x i1>
189 ; CHECK: middle.block
190 ; CHECK: icmp ult <2 x i32>
191 ; CHECK: select <2 x i1>
193 define i32 @umin_red(i32 %max) {
198 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
199 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
200 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
201 %0 = load i32* %arrayidx, align 4
202 %cmp3 = icmp ult i32 %0, %max.red.08
203 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
204 %indvars.iv.next = add i64 %indvars.iv, 1
205 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
206 %exitcond = icmp eq i32 %lftr.wideiv, 1024
207 br i1 %exitcond, label %for.end, label %for.body
213 ; Turn this into a min reduction. The select has its inputs reversed therefore
214 ; this is a min reduction.
215 ; CHECK: @umin_red_inverse_select
216 ; CHECK: icmp ugt <2 x i32>
217 ; CHECK: select <2 x i1>
218 ; CHECK: middle.block
219 ; CHECK: icmp ult <2 x i32>
220 ; CHECK: select <2 x i1>
222 define i32 @umin_red_inverse_select(i32 %max) {
227 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
228 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
229 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
230 %0 = load i32* %arrayidx, align 4
231 %cmp3 = icmp ugt i32 %max.red.08, %0
232 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
233 %indvars.iv.next = add i64 %indvars.iv, 1
234 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
235 %exitcond = icmp eq i32 %lftr.wideiv, 1024
236 br i1 %exitcond, label %for.end, label %for.body
243 ; Turn this into a min reduction (select inputs are reversed).
244 ; CHECK: @sge_min_red
245 ; CHECK: icmp sge <2 x i32>
246 ; CHECK: select <2 x i1>
247 ; CHECK: middle.block
248 ; CHECK: icmp slt <2 x i32>
249 ; CHECK: select <2 x i1>
251 define i32 @sge_min_red(i32 %max) {
256 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
257 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
258 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
259 %0 = load i32* %arrayidx, align 4
260 %cmp3 = icmp sge i32 %0, %max.red.08
261 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
262 %indvars.iv.next = add i64 %indvars.iv, 1
263 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
264 %exitcond = icmp eq i32 %lftr.wideiv, 1024
265 br i1 %exitcond, label %for.end, label %for.body
272 ; Turn this into a max reduction (select inputs are reversed).
273 ; CHECK: @sle_min_red
274 ; CHECK: icmp sle <2 x i32>
275 ; CHECK: select <2 x i1>
276 ; CHECK: middle.block
277 ; CHECK: icmp sgt <2 x i32>
278 ; CHECK: select <2 x i1>
280 define i32 @sle_min_red(i32 %max) {
285 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
286 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
287 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
288 %0 = load i32* %arrayidx, align 4
289 %cmp3 = icmp sle i32 %0, %max.red.08
290 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
291 %indvars.iv.next = add i64 %indvars.iv, 1
292 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
293 %exitcond = icmp eq i32 %lftr.wideiv, 1024
294 br i1 %exitcond, label %for.end, label %for.body
301 ; Turn this into a min reduction (select inputs are reversed).
302 ; CHECK: @uge_min_red
303 ; CHECK: icmp uge <2 x i32>
304 ; CHECK: select <2 x i1>
305 ; CHECK: middle.block
306 ; CHECK: icmp ult <2 x i32>
307 ; CHECK: select <2 x i1>
309 define i32 @uge_min_red(i32 %max) {
314 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
315 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
316 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
317 %0 = load i32* %arrayidx, align 4
318 %cmp3 = icmp uge i32 %0, %max.red.08
319 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
320 %indvars.iv.next = add i64 %indvars.iv, 1
321 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
322 %exitcond = icmp eq i32 %lftr.wideiv, 1024
323 br i1 %exitcond, label %for.end, label %for.body
330 ; Turn this into a max reduction (select inputs are reversed).
331 ; CHECK: @ule_min_red
332 ; CHECK: icmp ule <2 x i32>
333 ; CHECK: select <2 x i1>
334 ; CHECK: middle.block
335 ; CHECK: icmp ugt <2 x i32>
336 ; CHECK: select <2 x i1>
338 define i32 @ule_min_red(i32 %max) {
343 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
344 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
345 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
346 %0 = load i32* %arrayidx, align 4
347 %cmp3 = icmp ule i32 %0, %max.red.08
348 %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
349 %indvars.iv.next = add i64 %indvars.iv, 1
350 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
351 %exitcond = icmp eq i32 %lftr.wideiv, 1024
352 br i1 %exitcond, label %for.end, label %for.body
360 ; CHECK-NOT: icmp <2 x i32>
361 define i32 @no_red_1(i32 %max) {
366 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
367 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
368 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
369 %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
370 %0 = load i32* %arrayidx, align 4
371 %1 = load i32* %arrayidx1, align 4
372 %cmp3 = icmp sgt i32 %0, %1
373 %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
374 %indvars.iv.next = add i64 %indvars.iv, 1
375 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
376 %exitcond = icmp eq i32 %lftr.wideiv, 1024
377 br i1 %exitcond, label %for.end, label %for.body
384 ; CHECK-NOT: icmp <2 x i32>
385 define i32 @no_red_2(i32 %max) {
390 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
391 %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
392 %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
393 %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
394 %0 = load i32* %arrayidx, align 4
395 %1 = load i32* %arrayidx1, align 4
396 %cmp3 = icmp sgt i32 %0, %max.red.08
397 %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
398 %indvars.iv.next = add i64 %indvars.iv, 1
399 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
400 %exitcond = icmp eq i32 %lftr.wideiv, 1024
401 br i1 %exitcond, label %for.end, label %for.body