1 ; RUN: opt < %s -instcombine -S | FileCheck %s
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
9 ; CHECK-LABEL: @sse2_psrai_w_0
10 ; CHECK-NEXT: ret <8 x i16> %v
11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
15 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
16 ; CHECK-LABEL: @sse2_psrai_w_15
17 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
18 ; CHECK-NEXT: ret <8 x i16> %1
19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
23 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
24 ; CHECK-LABEL: @sse2_psrai_w_64
25 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
26 ; CHECK-NEXT: ret <8 x i16> %1
27 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
31 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
32 ; CHECK-LABEL: @sse2_psrai_d_0
33 ; CHECK-NEXT: ret <4 x i32> %v
34 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
38 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
39 ; CHECK-LABEL: @sse2_psrai_d_15
40 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
41 ; CHECK-NEXT: ret <4 x i32> %1
42 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
46 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
47 ; CHECK-LABEL: @sse2_psrai_d_64
48 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
49 ; CHECK-NEXT: ret <4 x i32> %1
50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
54 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
55 ; CHECK-LABEL: @avx2_psrai_w_0
56 ; CHECK-NEXT: ret <16 x i16> %v
57 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
61 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
62 ; CHECK-LABEL: @avx2_psrai_w_15
63 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
64 ; CHECK-NEXT: ret <16 x i16> %1
65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
69 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
70 ; CHECK-LABEL: @avx2_psrai_w_64
71 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
72 ; CHECK-NEXT: ret <16 x i16> %1
73 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
77 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
78 ; CHECK-LABEL: @avx2_psrai_d_0
79 ; CHECK-NEXT: ret <8 x i32> %v
80 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
84 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
85 ; CHECK-LABEL: @avx2_psrai_d_15
86 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
87 ; CHECK-NEXT: ret <8 x i32> %1
88 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
92 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
93 ; CHECK-LABEL: @avx2_psrai_d_64
94 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
95 ; CHECK-NEXT: ret <8 x i32> %1
96 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
104 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
105 ; CHECK-LABEL: @sse2_psrli_w_0
106 ; CHECK-NEXT: ret <8 x i16> %v
107 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
111 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
112 ; CHECK-LABEL: @sse2_psrli_w_15
113 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
114 ; CHECK-NEXT: ret <8 x i16> %1
115 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
119 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
120 ; CHECK-LABEL: @sse2_psrli_w_64
121 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
122 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
126 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
127 ; CHECK-LABEL: @sse2_psrli_d_0
128 ; CHECK-NEXT: ret <4 x i32> %v
129 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
133 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
134 ; CHECK-LABEL: @sse2_psrli_d_15
135 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
136 ; CHECK-NEXT: ret <4 x i32> %1
137 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
141 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
142 ; CHECK-LABEL: @sse2_psrli_d_64
143 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
144 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
148 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
149 ; CHECK-LABEL: @sse2_psrli_q_0
150 ; CHECK-NEXT: ret <2 x i64> %v
151 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
155 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
156 ; CHECK-LABEL: @sse2_psrli_q_15
157 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
158 ; CHECK-NEXT: ret <2 x i64> %1
159 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
163 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
164 ; CHECK-LABEL: @sse2_psrli_q_64
165 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
166 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
170 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
171 ; CHECK-LABEL: @avx2_psrli_w_0
172 ; CHECK-NEXT: ret <16 x i16> %v
173 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
177 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
178 ; CHECK-LABEL: @avx2_psrli_w_15
179 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
180 ; CHECK-NEXT: ret <16 x i16> %1
181 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
185 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
186 ; CHECK-LABEL: @avx2_psrli_w_64
187 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
188 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
192 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
193 ; CHECK-LABEL: @avx2_psrli_d_0
194 ; CHECK-NEXT: ret <8 x i32> %v
195 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
199 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
200 ; CHECK-LABEL: @avx2_psrli_d_15
201 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
202 ; CHECK-NEXT: ret <8 x i32> %1
203 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
207 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
208 ; CHECK-LABEL: @avx2_psrli_d_64
209 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
210 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
214 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
215 ; CHECK-LABEL: @avx2_psrli_q_0
216 ; CHECK-NEXT: ret <4 x i64> %v
217 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
221 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
222 ; CHECK-LABEL: @avx2_psrli_q_15
223 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
224 ; CHECK-NEXT: ret <4 x i64> %1
225 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
229 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
230 ; CHECK-LABEL: @avx2_psrli_q_64
231 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
232 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
240 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
241 ; CHECK-LABEL: @sse2_pslli_w_0
242 ; CHECK-NEXT: ret <8 x i16> %v
243 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
247 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
248 ; CHECK-LABEL: @sse2_pslli_w_15
249 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
250 ; CHECK-NEXT: ret <8 x i16> %1
251 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
255 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
256 ; CHECK-LABEL: @sse2_pslli_w_64
257 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
258 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
262 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
263 ; CHECK-LABEL: @sse2_pslli_d_0
264 ; CHECK-NEXT: ret <4 x i32> %v
265 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
269 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
270 ; CHECK-LABEL: @sse2_pslli_d_15
271 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
272 ; CHECK-NEXT: ret <4 x i32> %1
273 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
277 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
278 ; CHECK-LABEL: @sse2_pslli_d_64
279 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
280 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
284 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
285 ; CHECK-LABEL: @sse2_pslli_q_0
286 ; CHECK-NEXT: ret <2 x i64> %v
287 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
291 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
292 ; CHECK-LABEL: @sse2_pslli_q_15
293 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
294 ; CHECK-NEXT: ret <2 x i64> %1
295 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
299 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
300 ; CHECK-LABEL: @sse2_pslli_q_64
301 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
302 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
306 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
307 ; CHECK-LABEL: @avx2_pslli_w_0
308 ; CHECK-NEXT: ret <16 x i16> %v
309 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
313 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
314 ; CHECK-LABEL: @avx2_pslli_w_15
315 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
316 ; CHECK-NEXT: ret <16 x i16> %1
317 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
321 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
322 ; CHECK-LABEL: @avx2_pslli_w_64
323 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
324 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
328 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
329 ; CHECK-LABEL: @avx2_pslli_d_0
330 ; CHECK-NEXT: ret <8 x i32> %v
331 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
335 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
336 ; CHECK-LABEL: @avx2_pslli_d_15
337 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
338 ; CHECK-NEXT: ret <8 x i32> %1
339 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
343 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
344 ; CHECK-LABEL: @avx2_pslli_d_64
345 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
346 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
350 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
351 ; CHECK-LABEL: @avx2_pslli_q_0
352 ; CHECK-NEXT: ret <4 x i64> %v
353 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
357 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
358 ; CHECK-LABEL: @avx2_pslli_q_15
359 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
360 ; CHECK-NEXT: ret <4 x i64> %1
361 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
365 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
366 ; CHECK-LABEL: @avx2_pslli_q_64
367 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
368 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
373 ; ASHR - Constant Vector
376 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
377 ; CHECK-LABEL: @sse2_psra_w_0
378 ; CHECK-NEXT: ret <8 x i16> %v
379 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
383 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
384 ; CHECK-LABEL: @sse2_psra_w_15
385 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
386 ; CHECK-NEXT: ret <8 x i16> %1
387 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
391 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
392 ; CHECK-LABEL: @sse2_psra_w_15_splat
393 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
394 ; CHECK-NEXT: ret <8 x i16> %1
395 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
399 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
400 ; CHECK-LABEL: @sse2_psra_w_64
401 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
402 ; CHECK-NEXT: ret <8 x i16> %1
403 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
407 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
408 ; CHECK-LABEL: @sse2_psra_d_0
409 ; CHECK-NEXT: ret <4 x i32> %v
410 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
414 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
415 ; CHECK-LABEL: @sse2_psra_d_15
416 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
417 ; CHECK-NEXT: ret <4 x i32> %1
418 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
422 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
423 ; CHECK-LABEL: @sse2_psra_d_15_splat
424 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
425 ; CHECK-NEXT: ret <4 x i32> %1
426 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
430 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
431 ; CHECK-LABEL: @sse2_psra_d_64
432 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
433 ; CHECK-NEXT: ret <4 x i32> %1
434 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
438 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
439 ; CHECK-LABEL: @avx2_psra_w_0
440 ; CHECK-NEXT: ret <16 x i16> %v
441 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
445 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
446 ; CHECK-LABEL: @avx2_psra_w_15
447 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
448 ; CHECK-NEXT: ret <16 x i16> %1
449 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
453 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
454 ; CHECK-LABEL: @avx2_psra_w_15_splat
455 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
456 ; CHECK-NEXT: ret <16 x i16> %1
457 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
461 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
462 ; CHECK-LABEL: @avx2_psra_w_64
463 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
464 ; CHECK-NEXT: ret <16 x i16> %1
465 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
469 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
470 ; CHECK-LABEL: @avx2_psra_d_0
471 ; CHECK-NEXT: ret <8 x i32> %v
472 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
476 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
477 ; CHECK-LABEL: @avx2_psra_d_15
478 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
479 ; CHECK-NEXT: ret <8 x i32> %1
480 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
484 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
485 ; CHECK-LABEL: @avx2_psra_d_15_splat
486 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
487 ; CHECK-NEXT: ret <8 x i32> %1
488 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
492 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
493 ; CHECK-LABEL: @avx2_psra_d_64
494 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
495 ; CHECK-NEXT: ret <8 x i32> %1
496 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
501 ; LSHR - Constant Vector
504 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
505 ; CHECK-LABEL: @sse2_psrl_w_0
506 ; CHECK-NEXT: ret <8 x i16> %v
507 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
511 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
512 ; CHECK-LABEL: @sse2_psrl_w_15
513 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
514 ; CHECK-NEXT: ret <8 x i16> %1
515 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
519 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
520 ; CHECK-LABEL: @sse2_psrl_w_15_splat
521 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
522 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
526 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
527 ; CHECK-LABEL: @sse2_psrl_w_64
528 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
529 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
533 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
534 ; CHECK-LABEL: @sse2_psrl_d_0
535 ; CHECK-NEXT: ret <4 x i32> %v
536 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
540 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
541 ; CHECK-LABEL: @sse2_psrl_d_15
542 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
543 ; CHECK-NEXT: ret <4 x i32> %1
544 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
548 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
549 ; CHECK-LABEL: @sse2_psrl_d_15_splat
550 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
551 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
555 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
556 ; CHECK-LABEL: @sse2_psrl_d_64
557 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
558 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
562 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
563 ; CHECK-LABEL: @sse2_psrl_q_0
564 ; CHECK-NEXT: ret <2 x i64> %v
565 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
569 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
570 ; CHECK-LABEL: @sse2_psrl_q_15
571 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
572 ; CHECK-NEXT: ret <2 x i64> %1
573 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
577 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
578 ; CHECK-LABEL: @sse2_psrl_q_64
579 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
580 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
584 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
585 ; CHECK-LABEL: @avx2_psrl_w_0
586 ; CHECK-NEXT: ret <16 x i16> %v
587 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
591 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
592 ; CHECK-LABEL: @avx2_psrl_w_15
593 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
594 ; CHECK-NEXT: ret <16 x i16> %1
595 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
599 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
600 ; CHECK-LABEL: @avx2_psrl_w_15_splat
601 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
602 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
606 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
607 ; CHECK-LABEL: @avx2_psrl_w_64
608 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
609 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
613 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
614 ; CHECK-LABEL: @avx2_psrl_d_0
615 ; CHECK-NEXT: ret <8 x i32> %v
616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
620 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
621 ; CHECK-LABEL: @avx2_psrl_d_15
622 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
623 ; CHECK-NEXT: ret <8 x i32> %1
624 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
628 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
629 ; CHECK-LABEL: @avx2_psrl_d_15_splat
630 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
631 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
635 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
636 ; CHECK-LABEL: @avx2_psrl_d_64
637 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
638 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
642 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
643 ; CHECK-LABEL: @avx2_psrl_q_0
644 ; CHECK-NEXT: ret <4 x i64> %v
645 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
649 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
650 ; CHECK-LABEL: @avx2_psrl_q_15
651 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
652 ; CHECK-NEXT: ret <4 x i64> %1
653 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
657 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
658 ; CHECK-LABEL: @avx2_psrl_q_64
659 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
660 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
665 ; SHL - Constant Vector
668 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
669 ; CHECK-LABEL: @sse2_psll_w_0
670 ; CHECK-NEXT: ret <8 x i16> %v
671 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
675 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
676 ; CHECK-LABEL: @sse2_psll_w_15
677 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
678 ; CHECK-NEXT: ret <8 x i16> %1
679 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
683 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
684 ; CHECK-LABEL: @sse2_psll_w_15_splat
685 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
686 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
690 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
691 ; CHECK-LABEL: @sse2_psll_w_64
692 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
693 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
697 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
698 ; CHECK-LABEL: @sse2_psll_d_0
699 ; CHECK-NEXT: ret <4 x i32> %v
700 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
704 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
705 ; CHECK-LABEL: @sse2_psll_d_15
706 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
707 ; CHECK-NEXT: ret <4 x i32> %1
708 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
712 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
713 ; CHECK-LABEL: @sse2_psll_d_15_splat
714 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
715 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
719 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
720 ; CHECK-LABEL: @sse2_psll_d_64
721 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
722 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
726 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
727 ; CHECK-LABEL: @sse2_psll_q_0
728 ; CHECK-NEXT: ret <2 x i64> %v
729 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
733 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
734 ; CHECK-LABEL: @sse2_psll_q_15
735 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
736 ; CHECK-NEXT: ret <2 x i64> %1
737 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
741 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
742 ; CHECK-LABEL: @sse2_psll_q_64
743 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
744 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
748 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
749 ; CHECK-LABEL: @avx2_psll_w_0
750 ; CHECK-NEXT: ret <16 x i16> %v
751 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
755 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
756 ; CHECK-LABEL: @avx2_psll_w_15
757 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
758 ; CHECK-NEXT: ret <16 x i16> %1
759 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
763 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
764 ; CHECK-LABEL: @avx2_psll_w_15_splat
765 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
766 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
770 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
771 ; CHECK-LABEL: @avx2_psll_w_64
772 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
773 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
777 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
778 ; CHECK-LABEL: @avx2_psll_d_0
779 ; CHECK-NEXT: ret <8 x i32> %v
780 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
784 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
785 ; CHECK-LABEL: @avx2_psll_d_15
786 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
787 ; CHECK-NEXT: ret <8 x i32> %1
788 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
792 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
793 ; CHECK-LABEL: @avx2_psll_d_15_splat
794 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
795 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
799 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
800 ; CHECK-LABEL: @avx2_psll_d_64
801 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
802 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
806 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
807 ; CHECK-LABEL: @avx2_psll_q_0
808 ; CHECK-NEXT: ret <4 x i64> %v
809 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
813 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
814 ; CHECK-LABEL: @avx2_psll_q_15
815 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
816 ; CHECK-NEXT: ret <4 x i64> %1
817 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
821 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
822 ; CHECK-LABEL: @avx2_psll_q_64
823 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
824 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
829 ; Vector Demanded Bits
832 define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
833 ; CHECK-LABEL: @sse2_psra_w_var
834 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
835 ; CHECK-NEXT: ret <8 x i16> %1
836 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
837 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
841 define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
842 ; CHECK-LABEL: @sse2_psra_d_var
843 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
844 ; CHECK-NEXT: ret <4 x i32> %1
845 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
846 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
850 define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
851 ; CHECK-LABEL: @avx2_psra_w_var
852 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
853 ; CHECK-NEXT: ret <16 x i16> %1
854 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
855 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
859 define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
860 ; CHECK-LABEL: @avx2_psra_d_var
861 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
862 ; CHECK-NEXT: ret <8 x i32> %1
863 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
864 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
868 define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
869 ; CHECK-LABEL: @sse2_psrl_w_var
870 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
871 ; CHECK-NEXT: ret <8 x i16> %1
872 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
873 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
877 define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
878 ; CHECK-LABEL: @sse2_psrl_d_var
879 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
880 ; CHECK-NEXT: ret <4 x i32> %1
881 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
882 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
886 define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
887 ; CHECK-LABEL: @sse2_psrl_q_var
888 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
889 ; CHECK-NEXT: ret <2 x i64> %1
890 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
891 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
895 define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
896 ; CHECK-LABEL: @avx2_psrl_w_var
897 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
898 ; CHECK-NEXT: ret <16 x i16> %1
899 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
900 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
904 define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
905 ; CHECK-LABEL: @avx2_psrl_d_var
906 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
907 ; CHECK-NEXT: ret <8 x i32> %1
908 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
909 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
913 define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
914 ; CHECK-LABEL: @avx2_psrl_q_var
915 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
916 ; CHECK-NEXT: ret <4 x i64> %1
917 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
918 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
922 define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
923 ; CHECK-LABEL: @sse2_psll_w_var
924 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
925 ; CHECK-NEXT: ret <8 x i16> %1
926 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
927 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
931 define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
932 ; CHECK-LABEL: @sse2_psll_d_var
933 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
934 ; CHECK-NEXT: ret <4 x i32> %1
935 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
936 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
940 define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
941 ; CHECK-LABEL: @sse2_psll_q_var
942 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
943 ; CHECK-NEXT: ret <2 x i64> %1
944 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
945 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
949 define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
950 ; CHECK-LABEL: @avx2_psll_w_var
951 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
952 ; CHECK-NEXT: ret <16 x i16> %1
953 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
954 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
958 define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
959 ; CHECK-LABEL: @avx2_psll_d_var
960 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
961 ; CHECK-NEXT: ret <8 x i32> %1
962 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
963 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
967 define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
968 ; CHECK-LABEL: @avx2_psll_q_var
969 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
970 ; CHECK-NEXT: ret <4 x i64> %1
971 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
972 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
980 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
981 ; CHECK-LABEL: @test_sse2_psra_w_0
982 ; CHECK-NEXT: ret <8 x i16> %A
983 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
984 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
985 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
989 define <8 x i16> @test_sse2_psra_w_8() {
990 ; CHECK-LABEL: @test_sse2_psra_w_8
991 ; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
992 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
993 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
994 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
995 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
999 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
1000 ; CHECK-LABEL: @test_sse2_psra_d_0
1001 ; CHECK-NEXT: ret <4 x i32> %A
1002 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
1003 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1004 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
1008 define <4 x i32> @sse2_psra_d_8() {
1009 ; CHECK-LABEL: @sse2_psra_d_8
1010 ; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1011 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
1012 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
1013 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1014 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
1018 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
1019 ; CHECK-LABEL: @test_avx2_psra_w_0
1020 ; CHECK-NEXT: ret <16 x i16> %A
1021 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
1022 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1023 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
1027 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
1028 ; CHECK-LABEL: @test_avx2_psra_w_8
1029 ; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1030 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
1031 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
1032 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1033 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
1037 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
1038 ; CHECK-LABEL: @test_avx2_psra_d_0
1039 ; CHECK-NEXT: ret <8 x i32> %A
1040 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
1041 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1042 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
1046 define <8 x i32> @test_avx2_psra_d_8() {
1047 ; CHECK-LABEL: @test_avx2_psra_d_8
1048 ; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1049 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
1050 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
1051 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1052 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
1056 define <2 x i64> @test_sse2_1() {
1057 %S = bitcast i32 1 to i32
1058 %1 = zext i32 %S to i64
1059 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1060 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1061 %4 = bitcast <2 x i64> %3 to <8 x i16>
1062 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1063 %6 = bitcast <8 x i16> %5 to <4 x i32>
1064 %7 = bitcast <2 x i64> %3 to <4 x i32>
1065 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1066 %9 = bitcast <4 x i32> %8 to <2 x i64>
1067 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1068 %11 = bitcast <2 x i64> %10 to <8 x i16>
1069 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1070 %13 = bitcast <8 x i16> %12 to <4 x i32>
1071 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1072 %15 = bitcast <4 x i32> %14 to <2 x i64>
1073 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1075 ; CHECK: test_sse2_1
1076 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
1079 define <4 x i64> @test_avx2_1() {
1080 %S = bitcast i32 1 to i32
1081 %1 = zext i32 %S to i64
1082 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1083 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1084 %4 = bitcast <2 x i64> %3 to <8 x i16>
1085 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1086 %6 = bitcast <16 x i16> %5 to <8 x i32>
1087 %7 = bitcast <2 x i64> %3 to <4 x i32>
1088 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1089 %9 = bitcast <8 x i32> %8 to <4 x i64>
1090 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1091 %11 = bitcast <4 x i64> %10 to <16 x i16>
1092 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1093 %13 = bitcast <16 x i16> %12 to <8 x i32>
1094 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1095 %15 = bitcast <8 x i32> %14 to <4 x i64>
1096 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1098 ; CHECK: test_avx2_1
1099 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
1102 define <2 x i64> @test_sse2_0() {
1103 %S = bitcast i32 128 to i32
1104 %1 = zext i32 %S to i64
1105 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1106 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1107 %4 = bitcast <2 x i64> %3 to <8 x i16>
1108 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1109 %6 = bitcast <8 x i16> %5 to <4 x i32>
1110 %7 = bitcast <2 x i64> %3 to <4 x i32>
1111 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1112 %9 = bitcast <4 x i32> %8 to <2 x i64>
1113 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1114 %11 = bitcast <2 x i64> %10 to <8 x i16>
1115 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1116 %13 = bitcast <8 x i16> %12 to <4 x i32>
1117 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1118 %15 = bitcast <4 x i32> %14 to <2 x i64>
1119 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1121 ; CHECK: test_sse2_0
1122 ; CHECK: ret <2 x i64> zeroinitializer
1125 define <4 x i64> @test_avx2_0() {
1126 %S = bitcast i32 128 to i32
1127 %1 = zext i32 %S to i64
1128 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1129 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1130 %4 = bitcast <2 x i64> %3 to <8 x i16>
1131 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1132 %6 = bitcast <16 x i16> %5 to <8 x i32>
1133 %7 = bitcast <2 x i64> %3 to <4 x i32>
1134 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1135 %9 = bitcast <8 x i32> %8 to <4 x i64>
1136 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1137 %11 = bitcast <4 x i64> %10 to <16 x i16>
1138 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1139 %13 = bitcast <16 x i16> %12 to <8 x i32>
1140 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1141 %15 = bitcast <8 x i32> %14 to <4 x i64>
1142 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1144 ; CHECK: test_avx2_0
1145 ; CHECK: ret <4 x i64> zeroinitializer
1147 define <2 x i64> @test_sse2_psrl_1() {
1148 %S = bitcast i32 1 to i32
1149 %1 = zext i32 %S to i64
1150 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1151 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1152 %4 = bitcast <2 x i64> %3 to <8 x i16>
1153 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
1154 %6 = bitcast <8 x i16> %5 to <4 x i32>
1155 %7 = bitcast <2 x i64> %3 to <4 x i32>
1156 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1157 %9 = bitcast <4 x i32> %8 to <2 x i64>
1158 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1159 %11 = bitcast <2 x i64> %10 to <8 x i16>
1160 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1161 %13 = bitcast <8 x i16> %12 to <4 x i32>
1162 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1163 %15 = bitcast <4 x i32> %14 to <2 x i64>
1164 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1166 ; CHECK: test_sse2_psrl_1
1167 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
1170 define <4 x i64> @test_avx2_psrl_1() {
1171 %S = bitcast i32 1 to i32
1172 %1 = zext i32 %S to i64
1173 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1174 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1175 %4 = bitcast <2 x i64> %3 to <8 x i16>
1176 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1177 %6 = bitcast <16 x i16> %5 to <8 x i32>
1178 %7 = bitcast <2 x i64> %3 to <4 x i32>
1179 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1180 %9 = bitcast <8 x i32> %8 to <4 x i64>
1181 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1182 %11 = bitcast <4 x i64> %10 to <16 x i16>
1183 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1184 %13 = bitcast <16 x i16> %12 to <8 x i32>
1185 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1186 %15 = bitcast <8 x i32> %14 to <4 x i64>
1187 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1189 ; CHECK: test_avx2_psrl_1
1190 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
1193 define <2 x i64> @test_sse2_psrl_0() {
1194 %S = bitcast i32 128 to i32
1195 %1 = zext i32 %S to i64
1196 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1197 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1198 %4 = bitcast <2 x i64> %3 to <8 x i16>
1199 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
1200 %6 = bitcast <8 x i16> %5 to <4 x i32>
1201 %7 = bitcast <2 x i64> %3 to <4 x i32>
1202 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1203 %9 = bitcast <4 x i32> %8 to <2 x i64>
1204 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1205 %11 = bitcast <2 x i64> %10 to <8 x i16>
1206 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1207 %13 = bitcast <8 x i16> %12 to <4 x i32>
1208 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1209 %15 = bitcast <4 x i32> %14 to <2 x i64>
1210 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1212 ; CHECK: test_sse2_psrl_0
1213 ; CHECK: ret <2 x i64> zeroinitializer
1216 define <4 x i64> @test_avx2_psrl_0() {
1217 %S = bitcast i32 128 to i32
1218 %1 = zext i32 %S to i64
1219 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1220 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1221 %4 = bitcast <2 x i64> %3 to <8 x i16>
1222 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1223 %6 = bitcast <16 x i16> %5 to <8 x i32>
1224 %7 = bitcast <2 x i64> %3 to <4 x i32>
1225 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1226 %9 = bitcast <8 x i32> %8 to <4 x i64>
1227 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1228 %11 = bitcast <4 x i64> %10 to <16 x i16>
1229 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1230 %13 = bitcast <16 x i16> %12 to <8 x i32>
1231 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1232 %15 = bitcast <8 x i32> %14 to <4 x i64>
1233 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1235 ; CHECK: test_avx2_psrl_0
1236 ; CHECK: ret <4 x i64> zeroinitializer
1239 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
1240 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
1241 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
1242 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
1243 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
1244 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
1245 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
1246 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
1247 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
1248 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
1249 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
1250 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
1252 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
1253 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
1254 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
1255 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
1256 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
1257 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
1258 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
1259 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
1260 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
1261 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
1262 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
1263 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
1265 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
1266 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
1267 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
1268 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
1269 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
1270 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
1271 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
1272 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
1274 attributes #1 = { nounwind readnone }