1 ; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
6 ;CHECK-LABEL: @sqrt_f32(
7 ;CHECK: vsqrtf{{.*}}<4 x float>
9 declare float @sqrtf(float) nounwind readnone
10 define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
12 %cmp6 = icmp sgt i32 %n, 0
13 br i1 %cmp6, label %for.body, label %for.end
15 for.body: ; preds = %entry, %for.body
16 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
17 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
18 %0 = load float, float* %arrayidx, align 4
19 %call = tail call float @sqrtf(float %0) nounwind readnone
20 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
21 store float %call, float* %arrayidx2, align 4
22 %indvars.iv.next = add i64 %indvars.iv, 1
23 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
24 %exitcond = icmp eq i32 %lftr.wideiv, %n
25 br i1 %exitcond, label %for.end, label %for.body
27 for.end: ; preds = %for.body, %entry
31 ;CHECK-LABEL: @exp_f32(
32 ;CHECK: vexpf{{.*}}<4 x float>
34 declare float @expf(float) nounwind readnone
35 define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
37 %cmp6 = icmp sgt i32 %n, 0
38 br i1 %cmp6, label %for.body, label %for.end
40 for.body: ; preds = %entry, %for.body
41 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
42 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
43 %0 = load float, float* %arrayidx, align 4
44 %call = tail call float @expf(float %0) nounwind readnone
45 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
46 store float %call, float* %arrayidx2, align 4
47 %indvars.iv.next = add i64 %indvars.iv, 1
48 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
49 %exitcond = icmp eq i32 %lftr.wideiv, %n
50 br i1 %exitcond, label %for.end, label %for.body
52 for.end: ; preds = %for.body, %entry
56 ;CHECK-LABEL: @log_f32(
57 ;CHECK: vlogf{{.*}}<4 x float>
59 declare float @logf(float) nounwind readnone
60 define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
62 %cmp6 = icmp sgt i32 %n, 0
63 br i1 %cmp6, label %for.body, label %for.end
65 for.body: ; preds = %entry, %for.body
66 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
67 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
68 %0 = load float, float* %arrayidx, align 4
69 %call = tail call float @logf(float %0) nounwind readnone
70 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
71 store float %call, float* %arrayidx2, align 4
72 %indvars.iv.next = add i64 %indvars.iv, 1
73 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
74 %exitcond = icmp eq i32 %lftr.wideiv, %n
75 br i1 %exitcond, label %for.end, label %for.body
77 for.end: ; preds = %for.body, %entry
81 ; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call.
82 ;CHECK-LABEL: @fabs_f32(
83 ;CHECK: fabs{{.*}}<4 x float>
85 declare float @fabsf(float) nounwind readnone
86 define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
88 %cmp6 = icmp sgt i32 %n, 0
89 br i1 %cmp6, label %for.body, label %for.end
91 for.body: ; preds = %entry, %for.body
92 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
93 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
94 %0 = load float, float* %arrayidx, align 4
95 %call = tail call float @fabsf(float %0) nounwind readnone
96 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
97 store float %call, float* %arrayidx2, align 4
98 %indvars.iv.next = add i64 %indvars.iv, 1
99 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
100 %exitcond = icmp eq i32 %lftr.wideiv, %n
101 br i1 %exitcond, label %for.end, label %for.body
103 for.end: ; preds = %for.body, %entry
107 ; Test that we can vectorize an intrinsic into a vector call.
108 ;CHECK-LABEL: @exp_f32_intrin(
109 ;CHECK: vexpf{{.*}}<4 x float>
111 declare float @llvm.exp.f32(float) nounwind readnone
112 define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
114 %cmp6 = icmp sgt i32 %n, 0
115 br i1 %cmp6, label %for.body, label %for.end
117 for.body: ; preds = %entry, %for.body
118 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
119 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
120 %0 = load float, float* %arrayidx, align 4
121 %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
122 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
123 store float %call, float* %arrayidx2, align 4
124 %indvars.iv.next = add i64 %indvars.iv, 1
125 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
126 %exitcond = icmp eq i32 %lftr.wideiv, %n
127 br i1 %exitcond, label %for.end, label %for.body
129 for.end: ; preds = %for.body, %entry
133 ; Test that we don't vectorize arbitrary functions.
134 ;CHECK-LABEL: @foo_f32(
135 ;CHECK-NOT: foo{{.*}}<4 x float>
137 declare float @foo(float) nounwind readnone
138 define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
140 %cmp6 = icmp sgt i32 %n, 0
141 br i1 %cmp6, label %for.body, label %for.end
143 for.body: ; preds = %entry, %for.body
144 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
145 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
146 %0 = load float, float* %arrayidx, align 4
147 %call = tail call float @foo(float %0) nounwind readnone
148 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
149 store float %call, float* %arrayidx2, align 4
150 %indvars.iv.next = add i64 %indvars.iv, 1
151 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
152 %exitcond = icmp eq i32 %lftr.wideiv, %n
153 br i1 %exitcond, label %for.end, label %for.body
155 for.end: ; preds = %for.body, %entry
159 ; Test that we don't vectorize calls with nobuiltin attribute.
160 ;CHECK-LABEL: @sqrt_f32_nobuiltin(
161 ;CHECK-NOT: vsqrtf{{.*}}<4 x float>
163 define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
165 %cmp6 = icmp sgt i32 %n, 0
166 br i1 %cmp6, label %for.body, label %for.end
168 for.body: ; preds = %entry, %for.body
169 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
170 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
171 %0 = load float, float* %arrayidx, align 4
172 %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
173 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
174 store float %call, float* %arrayidx2, align 4
175 %indvars.iv.next = add i64 %indvars.iv, 1
176 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
177 %exitcond = icmp eq i32 %lftr.wideiv, %n
178 br i1 %exitcond, label %for.end, label %for.body
180 for.end: ; preds = %for.body, %entry