1 ; RUN: opt < %s -instcombine -S | FileCheck %s
\r
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
\r
8 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
9 ; CHECK-LABEL: @sse2_psrai_w_0
\r
10 ; CHECK-NEXT: ret <8 x i16> %v
\r
11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
\r
15 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
16 ; CHECK-LABEL: @sse2_psrai_w_15
\r
17 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
18 ; CHECK-NEXT: ret <8 x i16> %1
\r
19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
\r
23 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
24 ; CHECK-LABEL: @sse2_psrai_w_64
\r
25 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
26 ; CHECK-NEXT: ret <8 x i16> %1
\r
27 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
\r
31 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
32 ; CHECK-LABEL: @sse2_psrai_d_0
\r
33 ; CHECK-NEXT: ret <4 x i32> %v
\r
34 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
\r
38 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
39 ; CHECK-LABEL: @sse2_psrai_d_15
\r
40 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
41 ; CHECK-NEXT: ret <4 x i32> %1
\r
42 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
\r
46 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
47 ; CHECK-LABEL: @sse2_psrai_d_64
\r
48 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
\r
49 ; CHECK-NEXT: ret <4 x i32> %1
\r
50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
\r
54 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
55 ; CHECK-LABEL: @avx2_psrai_w_0
\r
56 ; CHECK-NEXT: ret <16 x i16> %v
\r
57 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
\r
61 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
62 ; CHECK-LABEL: @avx2_psrai_w_15
\r
63 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
64 ; CHECK-NEXT: ret <16 x i16> %1
\r
65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
\r
69 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
70 ; CHECK-LABEL: @avx2_psrai_w_64
\r
71 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
72 ; CHECK-NEXT: ret <16 x i16> %1
\r
73 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
\r
77 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
78 ; CHECK-LABEL: @avx2_psrai_d_0
\r
79 ; CHECK-NEXT: ret <8 x i32> %v
\r
80 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
\r
84 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
85 ; CHECK-LABEL: @avx2_psrai_d_15
\r
86 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
87 ; CHECK-NEXT: ret <8 x i32> %1
\r
88 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
\r
92 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
93 ; CHECK-LABEL: @avx2_psrai_d_64
\r
94 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
\r
95 ; CHECK-NEXT: ret <8 x i32> %1
\r
96 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
\r
104 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
105 ; CHECK-LABEL: @sse2_psrli_w_0
\r
106 ; CHECK-NEXT: ret <8 x i16> %v
\r
107 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
\r
111 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
112 ; CHECK-LABEL: @sse2_psrli_w_15
\r
113 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
114 ; CHECK-NEXT: ret <8 x i16> %1
\r
115 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
\r
119 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
120 ; CHECK-LABEL: @sse2_psrli_w_64
\r
121 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
122 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
\r
126 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
127 ; CHECK-LABEL: @sse2_psrli_d_0
\r
128 ; CHECK-NEXT: ret <4 x i32> %v
\r
129 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
\r
133 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
134 ; CHECK-LABEL: @sse2_psrli_d_15
\r
135 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
136 ; CHECK-NEXT: ret <4 x i32> %1
\r
137 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
\r
141 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
142 ; CHECK-LABEL: @sse2_psrli_d_64
\r
143 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
144 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
\r
148 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
149 ; CHECK-LABEL: @sse2_psrli_q_0
\r
150 ; CHECK-NEXT: ret <2 x i64> %v
\r
151 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
\r
155 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
156 ; CHECK-LABEL: @sse2_psrli_q_15
\r
157 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
158 ; CHECK-NEXT: ret <2 x i64> %1
\r
159 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
\r
163 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
164 ; CHECK-LABEL: @sse2_psrli_q_64
\r
165 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
166 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
\r
170 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
171 ; CHECK-LABEL: @avx2_psrli_w_0
\r
172 ; CHECK-NEXT: ret <16 x i16> %v
\r
173 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
\r
177 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
178 ; CHECK-LABEL: @avx2_psrli_w_15
\r
179 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
180 ; CHECK-NEXT: ret <16 x i16> %1
\r
181 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
\r
185 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
186 ; CHECK-LABEL: @avx2_psrli_w_64
\r
187 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
188 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
\r
192 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
193 ; CHECK-LABEL: @avx2_psrli_d_0
\r
194 ; CHECK-NEXT: ret <8 x i32> %v
\r
195 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
\r
199 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
200 ; CHECK-LABEL: @avx2_psrli_d_15
\r
201 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
202 ; CHECK-NEXT: ret <8 x i32> %1
\r
203 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
\r
207 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
208 ; CHECK-LABEL: @avx2_psrli_d_64
\r
209 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
210 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
\r
214 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
215 ; CHECK-LABEL: @avx2_psrli_q_0
\r
216 ; CHECK-NEXT: ret <4 x i64> %v
\r
217 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
\r
221 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
222 ; CHECK-LABEL: @avx2_psrli_q_15
\r
223 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
224 ; CHECK-NEXT: ret <4 x i64> %1
\r
225 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
\r
229 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
230 ; CHECK-LABEL: @avx2_psrli_q_64
\r
231 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
232 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
\r
240 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
241 ; CHECK-LABEL: @sse2_pslli_w_0
\r
242 ; CHECK-NEXT: ret <8 x i16> %v
\r
243 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
\r
247 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
248 ; CHECK-LABEL: @sse2_pslli_w_15
\r
249 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
250 ; CHECK-NEXT: ret <8 x i16> %1
\r
251 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
\r
255 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
256 ; CHECK-LABEL: @sse2_pslli_w_64
\r
257 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
258 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
\r
262 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
263 ; CHECK-LABEL: @sse2_pslli_d_0
\r
264 ; CHECK-NEXT: ret <4 x i32> %v
\r
265 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
\r
269 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
270 ; CHECK-LABEL: @sse2_pslli_d_15
\r
271 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
272 ; CHECK-NEXT: ret <4 x i32> %1
\r
273 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
\r
277 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
278 ; CHECK-LABEL: @sse2_pslli_d_64
\r
279 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
280 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
\r
284 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
285 ; CHECK-LABEL: @sse2_pslli_q_0
\r
286 ; CHECK-NEXT: ret <2 x i64> %v
\r
287 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
\r
291 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
292 ; CHECK-LABEL: @sse2_pslli_q_15
\r
293 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
294 ; CHECK-NEXT: ret <2 x i64> %1
\r
295 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
\r
299 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
300 ; CHECK-LABEL: @sse2_pslli_q_64
\r
301 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
302 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
\r
306 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
307 ; CHECK-LABEL: @avx2_pslli_w_0
\r
308 ; CHECK-NEXT: ret <16 x i16> %v
\r
309 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
\r
313 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
314 ; CHECK-LABEL: @avx2_pslli_w_15
\r
315 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
316 ; CHECK-NEXT: ret <16 x i16> %1
\r
317 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
\r
321 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
322 ; CHECK-LABEL: @avx2_pslli_w_64
\r
323 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
324 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
\r
328 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
329 ; CHECK-LABEL: @avx2_pslli_d_0
\r
330 ; CHECK-NEXT: ret <8 x i32> %v
\r
331 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
\r
335 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
336 ; CHECK-LABEL: @avx2_pslli_d_15
\r
337 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
338 ; CHECK-NEXT: ret <8 x i32> %1
\r
339 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
\r
343 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
344 ; CHECK-LABEL: @avx2_pslli_d_64
\r
345 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
346 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
\r
350 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
351 ; CHECK-LABEL: @avx2_pslli_q_0
\r
352 ; CHECK-NEXT: ret <4 x i64> %v
\r
353 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
\r
357 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
358 ; CHECK-LABEL: @avx2_pslli_q_15
\r
359 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
360 ; CHECK-NEXT: ret <4 x i64> %1
\r
361 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
\r
365 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
366 ; CHECK-LABEL: @avx2_pslli_q_64
\r
367 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
368 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
\r
373 ; ASHR - Constant Vector
\r
376 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
377 ; CHECK-LABEL: @sse2_psra_w_0
\r
378 ; CHECK-NEXT: ret <8 x i16> %v
\r
379 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
383 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
384 ; CHECK-LABEL: @sse2_psra_w_15
\r
385 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
386 ; CHECK-NEXT: ret <8 x i16> %1
\r
387 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
391 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
392 ; CHECK-LABEL: @sse2_psra_w_15_splat
\r
393 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
394 ; CHECK-NEXT: ret <8 x i16> %1
\r
395 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
399 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
400 ; CHECK-LABEL: @sse2_psra_w_64
\r
401 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
402 ; CHECK-NEXT: ret <8 x i16> %1
\r
403 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
407 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
408 ; CHECK-LABEL: @sse2_psra_d_0
\r
409 ; CHECK-NEXT: ret <4 x i32> %v
\r
410 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
414 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
415 ; CHECK-LABEL: @sse2_psra_d_15
\r
416 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
417 ; CHECK-NEXT: ret <4 x i32> %1
\r
418 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
422 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
423 ; CHECK-LABEL: @sse2_psra_d_15_splat
\r
424 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
\r
425 ; CHECK-NEXT: ret <4 x i32> %1
\r
426 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
430 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
431 ; CHECK-LABEL: @sse2_psra_d_64
\r
432 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
\r
433 ; CHECK-NEXT: ret <4 x i32> %1
\r
434 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
438 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
439 ; CHECK-LABEL: @avx2_psra_w_0
\r
440 ; CHECK-NEXT: ret <16 x i16> %v
\r
441 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
445 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
446 ; CHECK-LABEL: @avx2_psra_w_15
\r
447 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
448 ; CHECK-NEXT: ret <16 x i16> %1
\r
449 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
453 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
454 ; CHECK-LABEL: @avx2_psra_w_15_splat
\r
455 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
456 ; CHECK-NEXT: ret <16 x i16> %1
\r
457 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
461 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
462 ; CHECK-LABEL: @avx2_psra_w_64
\r
463 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
464 ; CHECK-NEXT: ret <16 x i16> %1
\r
465 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
469 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
470 ; CHECK-LABEL: @avx2_psra_d_0
\r
471 ; CHECK-NEXT: ret <8 x i32> %v
\r
472 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
476 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
477 ; CHECK-LABEL: @avx2_psra_d_15
\r
478 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
479 ; CHECK-NEXT: ret <8 x i32> %1
\r
480 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
484 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
485 ; CHECK-LABEL: @avx2_psra_d_15_splat
\r
486 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
\r
487 ; CHECK-NEXT: ret <8 x i32> %1
\r
488 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
492 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
493 ; CHECK-LABEL: @avx2_psra_d_64
\r
494 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
\r
495 ; CHECK-NEXT: ret <8 x i32> %1
\r
496 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
501 ; LSHR - Constant Vector
\r
504 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
505 ; CHECK-LABEL: @sse2_psrl_w_0
\r
506 ; CHECK-NEXT: ret <8 x i16> %v
\r
507 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
511 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
512 ; CHECK-LABEL: @sse2_psrl_w_15
\r
513 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
514 ; CHECK-NEXT: ret <8 x i16> %1
\r
515 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
519 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
520 ; CHECK-LABEL: @sse2_psrl_w_15_splat
\r
521 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
522 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
526 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
527 ; CHECK-LABEL: @sse2_psrl_w_64
\r
528 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
529 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
533 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
534 ; CHECK-LABEL: @sse2_psrl_d_0
\r
535 ; CHECK-NEXT: ret <4 x i32> %v
\r
536 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
540 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
541 ; CHECK-LABEL: @sse2_psrl_d_15
\r
542 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
543 ; CHECK-NEXT: ret <4 x i32> %1
\r
544 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
548 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
549 ; CHECK-LABEL: @sse2_psrl_d_15_splat
\r
550 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
551 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
555 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
556 ; CHECK-LABEL: @sse2_psrl_d_64
\r
557 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
558 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
562 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
563 ; CHECK-LABEL: @sse2_psrl_q_0
\r
564 ; CHECK-NEXT: ret <2 x i64> %v
\r
565 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
\r
569 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
570 ; CHECK-LABEL: @sse2_psrl_q_15
\r
571 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
572 ; CHECK-NEXT: ret <2 x i64> %1
\r
573 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
577 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
578 ; CHECK-LABEL: @sse2_psrl_q_64
\r
579 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
580 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
584 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
585 ; CHECK-LABEL: @avx2_psrl_w_0
\r
586 ; CHECK-NEXT: ret <16 x i16> %v
\r
587 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
591 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
592 ; CHECK-LABEL: @avx2_psrl_w_15
\r
593 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
594 ; CHECK-NEXT: ret <16 x i16> %1
\r
595 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
599 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
600 ; CHECK-LABEL: @avx2_psrl_w_15_splat
\r
601 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
602 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
606 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
607 ; CHECK-LABEL: @avx2_psrl_w_64
\r
608 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
609 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
613 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
614 ; CHECK-LABEL: @avx2_psrl_d_0
\r
615 ; CHECK-NEXT: ret <8 x i32> %v
\r
616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
620 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
621 ; CHECK-LABEL: @avx2_psrl_d_15
\r
622 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
623 ; CHECK-NEXT: ret <8 x i32> %1
\r
624 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
628 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
629 ; CHECK-LABEL: @avx2_psrl_d_15_splat
\r
630 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
631 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
635 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
636 ; CHECK-LABEL: @avx2_psrl_d_64
\r
637 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
638 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
642 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
643 ; CHECK-LABEL: @avx2_psrl_q_0
\r
644 ; CHECK-NEXT: ret <4 x i64> %v
\r
645 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
\r
649 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
650 ; CHECK-LABEL: @avx2_psrl_q_15
\r
651 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
652 ; CHECK-NEXT: ret <4 x i64> %1
\r
653 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
657 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
658 ; CHECK-LABEL: @avx2_psrl_q_64
\r
659 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
660 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
665 ; SHL - Constant Vector
\r
668 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
669 ; CHECK-LABEL: @sse2_psll_w_0
\r
670 ; CHECK-NEXT: ret <8 x i16> %v
\r
671 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
675 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
676 ; CHECK-LABEL: @sse2_psll_w_15
\r
677 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
678 ; CHECK-NEXT: ret <8 x i16> %1
\r
679 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
683 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
684 ; CHECK-LABEL: @sse2_psll_w_15_splat
\r
685 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
686 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
690 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
691 ; CHECK-LABEL: @sse2_psll_w_64
\r
692 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
693 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
697 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
698 ; CHECK-LABEL: @sse2_psll_d_0
\r
699 ; CHECK-NEXT: ret <4 x i32> %v
\r
700 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
704 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
705 ; CHECK-LABEL: @sse2_psll_d_15
\r
706 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
707 ; CHECK-NEXT: ret <4 x i32> %1
\r
708 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
712 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
713 ; CHECK-LABEL: @sse2_psll_d_15_splat
\r
714 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
715 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
719 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
720 ; CHECK-LABEL: @sse2_psll_d_64
\r
721 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
722 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
726 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
727 ; CHECK-LABEL: @sse2_psll_q_0
\r
728 ; CHECK-NEXT: ret <2 x i64> %v
\r
729 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
\r
733 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
734 ; CHECK-LABEL: @sse2_psll_q_15
\r
735 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
736 ; CHECK-NEXT: ret <2 x i64> %1
\r
737 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
741 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
742 ; CHECK-LABEL: @sse2_psll_q_64
\r
743 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
744 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
748 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
749 ; CHECK-LABEL: @avx2_psll_w_0
\r
750 ; CHECK-NEXT: ret <16 x i16> %v
\r
751 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
755 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
756 ; CHECK-LABEL: @avx2_psll_w_15
\r
757 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
758 ; CHECK-NEXT: ret <16 x i16> %1
\r
759 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
763 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
764 ; CHECK-LABEL: @avx2_psll_w_15_splat
\r
765 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
766 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
770 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
771 ; CHECK-LABEL: @avx2_psll_w_64
\r
772 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
773 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
777 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
778 ; CHECK-LABEL: @avx2_psll_d_0
\r
779 ; CHECK-NEXT: ret <8 x i32> %v
\r
780 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
784 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
785 ; CHECK-LABEL: @avx2_psll_d_15
\r
786 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
787 ; CHECK-NEXT: ret <8 x i32> %1
\r
788 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
792 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
793 ; CHECK-LABEL: @avx2_psll_d_15_splat
\r
794 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
795 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
799 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
800 ; CHECK-LABEL: @avx2_psll_d_64
\r
801 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
802 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
806 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
807 ; CHECK-LABEL: @avx2_psll_q_0
\r
808 ; CHECK-NEXT: ret <4 x i64> %v
\r
809 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
\r
813 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
814 ; CHECK-LABEL: @avx2_psll_q_15
\r
815 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
816 ; CHECK-NEXT: ret <4 x i64> %1
\r
817 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
821 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
822 ; CHECK-LABEL: @avx2_psll_q_64
\r
823 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
824 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
829 ; Vector Demanded Bits
\r
832 define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
\r
833 ; CHECK-LABEL: @sse2_psra_w_var
\r
834 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
\r
835 ; CHECK-NEXT: ret <8 x i16> %1
\r
836 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
\r
837 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
\r
841 define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
\r
842 ; CHECK-LABEL: @sse2_psra_d_var
\r
843 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
\r
844 ; CHECK-NEXT: ret <4 x i32> %1
\r
845 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
\r
846 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
\r
850 define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
\r
851 ; CHECK-LABEL: @avx2_psra_w_var
\r
852 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
\r
853 ; CHECK-NEXT: ret <16 x i16> %1
\r
854 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
\r
855 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
\r
859 define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
\r
860 ; CHECK-LABEL: @avx2_psra_d_var
\r
861 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
\r
862 ; CHECK-NEXT: ret <8 x i32> %1
\r
863 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
\r
864 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
\r
868 define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
\r
869 ; CHECK-LABEL: @sse2_psrl_w_var
\r
870 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
\r
871 ; CHECK-NEXT: ret <8 x i16> %1
\r
872 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
\r
873 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
\r
877 define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
\r
878 ; CHECK-LABEL: @sse2_psrl_d_var
\r
879 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
\r
880 ; CHECK-NEXT: ret <4 x i32> %1
\r
881 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
\r
882 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
\r
886 define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
\r
887 ; CHECK-LABEL: @sse2_psrl_q_var
\r
888 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
\r
889 ; CHECK-NEXT: ret <2 x i64> %1
\r
890 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
\r
891 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
\r
895 define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
\r
896 ; CHECK-LABEL: @avx2_psrl_w_var
\r
897 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
\r
898 ; CHECK-NEXT: ret <16 x i16> %1
\r
899 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
\r
900 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
\r
904 define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
\r
905 ; CHECK-LABEL: @avx2_psrl_d_var
\r
906 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
\r
907 ; CHECK-NEXT: ret <8 x i32> %1
\r
908 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
\r
909 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
\r
913 define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
\r
914 ; CHECK-LABEL: @avx2_psrl_q_var
\r
915 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
\r
916 ; CHECK-NEXT: ret <4 x i64> %1
\r
917 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
\r
918 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
\r
922 define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
\r
923 ; CHECK-LABEL: @sse2_psll_w_var
\r
924 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
\r
925 ; CHECK-NEXT: ret <8 x i16> %1
\r
926 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
\r
927 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
\r
931 define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
\r
932 ; CHECK-LABEL: @sse2_psll_d_var
\r
933 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
\r
934 ; CHECK-NEXT: ret <4 x i32> %1
\r
935 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
\r
936 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
\r
940 define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
\r
941 ; CHECK-LABEL: @sse2_psll_q_var
\r
942 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
\r
943 ; CHECK-NEXT: ret <2 x i64> %1
\r
944 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
\r
945 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
\r
949 define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
\r
950 ; CHECK-LABEL: @avx2_psll_w_var
\r
951 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
\r
952 ; CHECK-NEXT: ret <16 x i16> %1
\r
953 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
\r
954 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
\r
958 define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
\r
959 ; CHECK-LABEL: @avx2_psll_d_var
\r
960 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
\r
961 ; CHECK-NEXT: ret <8 x i32> %1
\r
962 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
\r
963 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
\r
967 define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
\r
968 ; CHECK-LABEL: @avx2_psll_q_var
\r
969 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
\r
970 ; CHECK-NEXT: ret <4 x i64> %1
\r
971 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
\r
972 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
\r
980 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
\r
981 ; CHECK-LABEL: @test_sse2_psra_w_0
\r
982 ; CHECK-NEXT: ret <8 x i16> %A
\r
983 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
\r
984 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
985 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
\r
989 define <8 x i16> @test_sse2_psra_w_8() {
\r
990 ; CHECK-LABEL: @test_sse2_psra_w_8
\r
991 ; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
\r
992 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
\r
993 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
\r
994 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
995 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
\r
999 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
\r
1000 ; CHECK-LABEL: @test_sse2_psra_d_0
\r
1001 ; CHECK-NEXT: ret <4 x i32> %A
\r
1002 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
\r
1003 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
\r
1004 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
\r
1008 define <4 x i32> @sse2_psra_d_8() {
\r
1009 ; CHECK-LABEL: @sse2_psra_d_8
\r
1010 ; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
\r
1011 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
\r
1012 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
\r
1013 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
\r
1014 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
\r
1018 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
\r
1019 ; CHECK-LABEL: @test_avx2_psra_w_0
\r
1020 ; CHECK-NEXT: ret <16 x i16> %A
\r
1021 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
\r
1022 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
1023 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
\r
1027 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
\r
1028 ; CHECK-LABEL: @test_avx2_psra_w_8
\r
1029 ; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
\r
1030 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
\r
1031 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
\r
1032 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
1033 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
\r
1037 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
\r
1038 ; CHECK-LABEL: @test_avx2_psra_d_0
\r
1039 ; CHECK-NEXT: ret <8 x i32> %A
\r
1040 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
\r
1041 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
\r
1042 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
\r
1046 define <8 x i32> @test_avx2_psra_d_8() {
\r
1047 ; CHECK-LABEL: @test_avx2_psra_d_8
\r
1048 ; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
\r
1049 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
\r
1050 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
\r
1051 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
\r
1052 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
\r
1056 define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
\r
1057 %S = bitcast i32 1 to i32
\r
1058 %1 = zext i32 %S to i64
\r
1059 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1060 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1061 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1062 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
1063 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
1064 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1065 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
1066 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
1067 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
1068 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
1069 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
1070 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
1071 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
1072 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
1073 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
1075 ; CHECK: test_sse2_1
\r
1076 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
\r
1079 define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
\r
1080 %S = bitcast i32 1 to i32
\r
1081 %1 = zext i32 %S to i64
\r
1082 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1083 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1084 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1085 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
1086 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
1087 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1088 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
1089 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
1090 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
1091 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
1092 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
1093 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
1094 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
1095 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
1096 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
1098 ; CHECK: test_avx2_1
\r
1099 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
\r
1102 define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
\r
1103 %S = bitcast i32 128 to i32
\r
1104 %1 = zext i32 %S to i64
\r
1105 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1106 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1107 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1108 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
1109 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
1110 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1111 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
1112 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
1113 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
1114 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
1115 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
1116 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
1117 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
1118 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
1119 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
1121 ; CHECK: test_sse2_0
\r
1122 ; CHECK: ret <2 x i64> zeroinitializer
\r
1125 define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
\r
1126 %S = bitcast i32 128 to i32
\r
1127 %1 = zext i32 %S to i64
\r
1128 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1129 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1130 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1131 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
1132 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
1133 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1134 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
1135 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
1136 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
1137 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
1138 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
1139 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
1140 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
1141 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
1142 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
1144 ; CHECK: test_avx2_0
\r
1145 ; CHECK: ret <4 x i64> zeroinitializer
\r
1147 define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
\r
1148 %S = bitcast i32 1 to i32
\r
1149 %1 = zext i32 %S to i64
\r
1150 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1151 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1152 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1153 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
\r
1154 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
1155 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1156 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
1157 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
1158 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
1159 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
1160 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
1161 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
1162 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
1163 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
1164 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
1166 ; CHECK: test_sse2_psrl_1
\r
1167 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
\r
1170 define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
\r
1171 %S = bitcast i32 1 to i32
\r
1172 %1 = zext i32 %S to i64
\r
1173 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1174 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1175 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1176 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
1177 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
1178 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1179 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
1180 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
1181 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
1182 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
1183 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
1184 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
1185 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
1186 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
1187 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
1189 ; CHECK: test_avx2_psrl_1
\r
1190 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
\r
1193 define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
\r
1194 %S = bitcast i32 128 to i32
\r
1195 %1 = zext i32 %S to i64
\r
1196 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1197 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1198 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1199 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
\r
1200 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
1201 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1202 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
1203 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
1204 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
1205 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
1206 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
1207 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
1208 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
1209 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
1210 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
1212 ; CHECK: test_sse2_psrl_0
\r
1213 ; CHECK: ret <2 x i64> zeroinitializer
\r
1216 define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
\r
1217 %S = bitcast i32 128 to i32
\r
1218 %1 = zext i32 %S to i64
\r
1219 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1220 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1221 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1222 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
1223 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
1224 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1225 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
1226 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
1227 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
1228 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
1229 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
1230 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
1231 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
1232 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
1233 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
1235 ; CHECK: test_avx2_psrl_0
\r
1236 ; CHECK: ret <4 x i64> zeroinitializer
\r
1239 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
\r
1240 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
\r
1241 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
\r
1242 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
\r
1243 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
\r
1244 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
\r
1245 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
\r
1246 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
\r
1247 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
\r
1248 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
\r
1249 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
\r
1250 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
\r
1252 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
\r
1253 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
\r
1254 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
\r
1255 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
\r
1256 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
\r
1257 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
\r
1258 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
\r
1259 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
\r
1260 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
\r
1261 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
\r
1262 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
\r
1263 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
\r
1265 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
\r
1266 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
\r
1267 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
\r
1268 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
\r
1269 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
\r
1270 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
\r
1271 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
\r
1272 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
\r
1274 attributes #1 = { nounwind readnone }
\r