-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@dd = common global [1024 x float] zeroinitializer, align 16
@dj = common global [1024 x i32] zeroinitializer, align 16
-;CHECK: @example1
+;CHECK-LABEL: @example1(
;CHECK: load <4 x i32>
;CHECK: add nsw <4 x i32>
;CHECK: store <4 x i32>
;CHECK: ret void
+;UNROLL-LABEL: @example1(
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example1() nounwind uwtable ssp {
br label %1
ret void
}
-;CHECK: @example2
+;CHECK-LABEL: @example2(
;CHECK: store <4 x i32>
;CHECK: ret void
+;UNROLL-LABEL: @example2(
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph5, label %.preheader
ret void
}
-; We can't vectorize this loop because it has non constant loop bounds.
-;CHECK: @example3
-;CHECK-NOT: <4 x i32>
+;CHECK-LABEL: @example3(
+;CHECK: <4 x i32>
;CHECK: ret void
+;UNROLL-LABEL: @example3(
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: ret void
define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
%1 = icmp eq i32 %n, 0
br i1 %1, label %._crit_edge, label %.lr.ph
ret void
}
-;CHECK: @example4
+;CHECK-LABEL: @example4(
;CHECK: load <4 x i32>
;CHECK: ret void
+;UNROLL-LABEL: @example4(
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: ret void
define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
%1 = add nsw i32 %n, -1
%2 = icmp eq i32 %n, 0
ret void
}
-;CHECK: @example8
+;CHECK-LABEL: @example8(
;CHECK: store <4 x i32>
;CHECK: ret void
+;UNROLL-LABEL: @example8(
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example8(i32 %x) nounwind uwtable ssp {
br label %.preheader
ret void
}
-;CHECK: @example9
+;CHECK-LABEL: @example9(
;CHECK: phi <4 x i32>
;CHECK: ret i32
define i32 @example9() nounwind uwtable readonly ssp {
ret i32 %7
}
-;CHECK: @example10a
+;CHECK-LABEL: @example10a(
;CHECK: load <4 x i32>
;CHECK: add nsw <4 x i32>
;CHECK: load <4 x i16>
ret void
}
-;CHECK: @example10b
+;CHECK-LABEL: @example10b(
;CHECK: load <4 x i16>
;CHECK: sext <4 x i16>
;CHECK: store <4 x i32>
ret void
}
-;CHECK: @example11
+;CHECK-LABEL: @example11(
;CHECK: load i32
;CHECK: load i32
;CHECK: load i32
ret void
}
-;CHECK: @example12
-;CHECK: trunc <4 x i64>
+;CHECK-LABEL: @example12(
+;CHECK: trunc i64
;CHECK: store <4 x i32>
;CHECK: ret void
define void @example12() nounwind uwtable ssp {
}
; Can't vectorize because of reductions.
-;CHECK: @example13
+;CHECK-LABEL: @example13(
;CHECK-NOT: <4 x i32>
;CHECK: ret void
define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
ret void
}
-; Can't vectorize because of reductions.
-;CHECK: @example14
-;CHECK-NOT: <4 x i32>
+; Can vectorize.
+;CHECK-LABEL: @example14(
+;CHECK: <4 x i32>
;CHECK: ret void
define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
.preheader3:
ret void
}
-; Can't vectorize because the src and dst pointers are not disjoint.
-;CHECK: @example21
-;CHECK-NOT: <4 x i32>
+;CHECK-LABEL: @example21(
+;CHECK: load <4 x i32>
+;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
;CHECK: ret i32
define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
%1 = icmp sgt i32 %n, 0
ret i32 %a.0.lcssa
}
-; Can't vectorize because there are multiple PHIs.
-;CHECK: @example23
-;CHECK-NOT: <4 x i32>
+;CHECK-LABEL: @example23(
+;CHECK: <4 x i32>
;CHECK: ret void
define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
br label %1
ret void
}
-;CHECK: @example24
+;CHECK-LABEL: @example24(
;CHECK: shufflevector <4 x i16>
;CHECK: ret void
define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
ret void
}
-;CHECK: @example25
+;CHECK-LABEL: @example25(
;CHECK: and <4 x i1>
;CHECK: zext <4 x i1>
;CHECK: ret void