1 ; RUN: opt < %s -instcombine -S | FileCheck %s
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
9 ; CHECK-LABEL: @sse2_psrai_w_0
10 ; CHECK-NEXT: ret <8 x i16> %v
11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
15 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
16 ; CHECK-LABEL: @sse2_psrai_w_15
17 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
18 ; CHECK-NEXT: ret <8 x i16> %1
19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
23 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
24 ; CHECK-LABEL: @sse2_psrai_w_64
25 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
26 ; CHECK-NEXT: ret <8 x i16> %1
27 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
31 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
32 ; CHECK-LABEL: @sse2_psrai_d_0
33 ; CHECK-NEXT: ret <4 x i32> %v
34 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
38 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
39 ; CHECK-LABEL: @sse2_psrai_d_15
40 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
41 ; CHECK-NEXT: ret <4 x i32> %1
42 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
46 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
47 ; CHECK-LABEL: @sse2_psrai_d_64
48 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
49 ; CHECK-NEXT: ret <4 x i32> %1
50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
54 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
55 ; CHECK-LABEL: @avx2_psrai_w_0
56 ; CHECK-NEXT: ret <16 x i16> %v
57 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
61 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
62 ; CHECK-LABEL: @avx2_psrai_w_15
63 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
64 ; CHECK-NEXT: ret <16 x i16> %1
65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
69 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
70 ; CHECK-LABEL: @avx2_psrai_w_64
71 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
72 ; CHECK-NEXT: ret <16 x i16> %1
73 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
77 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
78 ; CHECK-LABEL: @avx2_psrai_d_0
79 ; CHECK-NEXT: ret <8 x i32> %v
80 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
84 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
85 ; CHECK-LABEL: @avx2_psrai_d_15
86 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
87 ; CHECK-NEXT: ret <8 x i32> %1
88 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
92 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
93 ; CHECK-LABEL: @avx2_psrai_d_64
94 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
95 ; CHECK-NEXT: ret <8 x i32> %1
96 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
104 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
105 ; CHECK-LABEL: @sse2_psrli_w_0
106 ; CHECK-NEXT: ret <8 x i16> %v
107 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
111 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
112 ; CHECK-LABEL: @sse2_psrli_w_15
113 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
114 ; CHECK-NEXT: ret <8 x i16> %1
115 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
119 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
120 ; CHECK-LABEL: @sse2_psrli_w_64
121 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
122 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
126 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
127 ; CHECK-LABEL: @sse2_psrli_d_0
128 ; CHECK-NEXT: ret <4 x i32> %v
129 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
133 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
134 ; CHECK-LABEL: @sse2_psrli_d_15
135 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
136 ; CHECK-NEXT: ret <4 x i32> %1
137 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
141 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
142 ; CHECK-LABEL: @sse2_psrli_d_64
143 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
144 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
148 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
149 ; CHECK-LABEL: @sse2_psrli_q_0
150 ; CHECK-NEXT: ret <2 x i64> %v
151 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
155 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
156 ; CHECK-LABEL: @sse2_psrli_q_15
157 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
158 ; CHECK-NEXT: ret <2 x i64> %1
159 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
163 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
164 ; CHECK-LABEL: @sse2_psrli_q_64
165 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
166 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
170 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
171 ; CHECK-LABEL: @avx2_psrli_w_0
172 ; CHECK-NEXT: ret <16 x i16> %v
173 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
177 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
178 ; CHECK-LABEL: @avx2_psrli_w_15
179 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
180 ; CHECK-NEXT: ret <16 x i16> %1
181 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
185 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
186 ; CHECK-LABEL: @avx2_psrli_w_64
187 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
188 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
192 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
193 ; CHECK-LABEL: @avx2_psrli_d_0
194 ; CHECK-NEXT: ret <8 x i32> %v
195 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
199 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
200 ; CHECK-LABEL: @avx2_psrli_d_15
201 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
202 ; CHECK-NEXT: ret <8 x i32> %1
203 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
207 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
208 ; CHECK-LABEL: @avx2_psrli_d_64
209 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
210 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
214 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
215 ; CHECK-LABEL: @avx2_psrli_q_0
216 ; CHECK-NEXT: ret <4 x i64> %v
217 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
221 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
222 ; CHECK-LABEL: @avx2_psrli_q_15
223 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
224 ; CHECK-NEXT: ret <4 x i64> %1
225 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
229 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
230 ; CHECK-LABEL: @avx2_psrli_q_64
231 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
232 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
240 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
241 ; CHECK-LABEL: @sse2_pslli_w_0
242 ; CHECK-NEXT: ret <8 x i16> %v
243 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
247 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
248 ; CHECK-LABEL: @sse2_pslli_w_15
249 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
250 ; CHECK-NEXT: ret <8 x i16> %1
251 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
255 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
256 ; CHECK-LABEL: @sse2_pslli_w_64
257 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
258 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
262 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
263 ; CHECK-LABEL: @sse2_pslli_d_0
264 ; CHECK-NEXT: ret <4 x i32> %v
265 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
269 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
270 ; CHECK-LABEL: @sse2_pslli_d_15
271 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
272 ; CHECK-NEXT: ret <4 x i32> %1
273 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
277 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
278 ; CHECK-LABEL: @sse2_pslli_d_64
279 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
280 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
284 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
285 ; CHECK-LABEL: @sse2_pslli_q_0
286 ; CHECK-NEXT: ret <2 x i64> %v
287 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
291 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
292 ; CHECK-LABEL: @sse2_pslli_q_15
293 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
294 ; CHECK-NEXT: ret <2 x i64> %1
295 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
299 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
300 ; CHECK-LABEL: @sse2_pslli_q_64
301 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
302 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
306 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
307 ; CHECK-LABEL: @avx2_pslli_w_0
308 ; CHECK-NEXT: ret <16 x i16> %v
309 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
313 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
314 ; CHECK-LABEL: @avx2_pslli_w_15
315 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
316 ; CHECK-NEXT: ret <16 x i16> %1
317 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
321 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
322 ; CHECK-LABEL: @avx2_pslli_w_64
323 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
324 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
328 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
329 ; CHECK-LABEL: @avx2_pslli_d_0
330 ; CHECK-NEXT: ret <8 x i32> %v
331 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
335 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
336 ; CHECK-LABEL: @avx2_pslli_d_15
337 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
338 ; CHECK-NEXT: ret <8 x i32> %1
339 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
343 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
344 ; CHECK-LABEL: @avx2_pslli_d_64
345 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
346 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
350 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
351 ; CHECK-LABEL: @avx2_pslli_q_0
352 ; CHECK-NEXT: ret <4 x i64> %v
353 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
357 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
358 ; CHECK-LABEL: @avx2_pslli_q_15
359 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
360 ; CHECK-NEXT: ret <4 x i64> %1
361 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
365 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
366 ; CHECK-LABEL: @avx2_pslli_q_64
367 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
368 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
373 ; ASHR - Constant Vector
376 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
377 ; CHECK-LABEL: @sse2_psra_w_0
378 ; CHECK-NEXT: ret <8 x i16> %v
379 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
383 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
384 ; CHECK-LABEL: @sse2_psra_w_15
385 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
386 ; CHECK-NEXT: ret <8 x i16> %1
387 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
391 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
392 ; CHECK-LABEL: @sse2_psra_w_15_splat
393 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
394 ; CHECK-NEXT: ret <8 x i16> %1
395 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
399 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
400 ; CHECK-LABEL: @sse2_psra_w_64
401 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
402 ; CHECK-NEXT: ret <8 x i16> %1
403 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
407 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
408 ; CHECK-LABEL: @sse2_psra_d_0
409 ; CHECK-NEXT: ret <4 x i32> %v
410 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
414 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
415 ; CHECK-LABEL: @sse2_psra_d_15
416 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
417 ; CHECK-NEXT: ret <4 x i32> %1
418 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
422 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
423 ; CHECK-LABEL: @sse2_psra_d_15_splat
424 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
425 ; CHECK-NEXT: ret <4 x i32> %1
426 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
430 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
431 ; CHECK-LABEL: @sse2_psra_d_64
432 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
433 ; CHECK-NEXT: ret <4 x i32> %1
434 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
438 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
439 ; CHECK-LABEL: @avx2_psra_w_0
440 ; CHECK-NEXT: ret <16 x i16> %v
441 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
445 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
446 ; CHECK-LABEL: @avx2_psra_w_15
447 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
448 ; CHECK-NEXT: ret <16 x i16> %1
449 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
453 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
454 ; CHECK-LABEL: @avx2_psra_w_15_splat
455 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
456 ; CHECK-NEXT: ret <16 x i16> %1
457 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
461 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
462 ; CHECK-LABEL: @avx2_psra_w_64
463 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
464 ; CHECK-NEXT: ret <16 x i16> %1
465 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
469 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
470 ; CHECK-LABEL: @avx2_psra_d_0
471 ; CHECK-NEXT: ret <8 x i32> %v
472 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
476 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
477 ; CHECK-LABEL: @avx2_psra_d_15
478 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
479 ; CHECK-NEXT: ret <8 x i32> %1
480 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
484 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
485 ; CHECK-LABEL: @avx2_psra_d_15_splat
486 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
487 ; CHECK-NEXT: ret <8 x i32> %1
488 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
492 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
493 ; CHECK-LABEL: @avx2_psra_d_64
494 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
495 ; CHECK-NEXT: ret <8 x i32> %1
496 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
501 ; LSHR - Constant Vector
504 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
505 ; CHECK-LABEL: @sse2_psrl_w_0
506 ; CHECK-NEXT: ret <8 x i16> %v
507 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
511 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
512 ; CHECK-LABEL: @sse2_psrl_w_15
513 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
514 ; CHECK-NEXT: ret <8 x i16> %1
515 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
519 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
520 ; CHECK-LABEL: @sse2_psrl_w_15_splat
521 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
522 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
526 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
527 ; CHECK-LABEL: @sse2_psrl_w_64
528 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
529 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
533 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
534 ; CHECK-LABEL: @sse2_psrl_d_0
535 ; CHECK-NEXT: ret <4 x i32> %v
536 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
540 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
541 ; CHECK-LABEL: @sse2_psrl_d_15
542 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
543 ; CHECK-NEXT: ret <4 x i32> %1
544 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
548 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
549 ; CHECK-LABEL: @sse2_psrl_d_15_splat
550 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
551 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
555 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
556 ; CHECK-LABEL: @sse2_psrl_d_64
557 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
558 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
562 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
563 ; CHECK-LABEL: @sse2_psrl_q_0
564 ; CHECK-NEXT: ret <2 x i64> %v
565 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
569 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
570 ; CHECK-LABEL: @sse2_psrl_q_15
571 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
572 ; CHECK-NEXT: ret <2 x i64> %1
573 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
577 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
578 ; CHECK-LABEL: @sse2_psrl_q_64
579 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
580 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
584 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
585 ; CHECK-LABEL: @avx2_psrl_w_0
586 ; CHECK-NEXT: ret <16 x i16> %v
587 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
591 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
592 ; CHECK-LABEL: @avx2_psrl_w_15
593 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
594 ; CHECK-NEXT: ret <16 x i16> %1
595 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
599 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
600 ; CHECK-LABEL: @avx2_psrl_w_15_splat
601 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
602 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
606 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
607 ; CHECK-LABEL: @avx2_psrl_w_64
608 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
609 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
613 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
614 ; CHECK-LABEL: @avx2_psrl_d_0
615 ; CHECK-NEXT: ret <8 x i32> %v
616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
620 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
621 ; CHECK-LABEL: @avx2_psrl_d_15
622 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
623 ; CHECK-NEXT: ret <8 x i32> %1
624 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
628 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
629 ; CHECK-LABEL: @avx2_psrl_d_15_splat
630 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
631 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
635 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
636 ; CHECK-LABEL: @avx2_psrl_d_64
637 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
638 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
642 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
643 ; CHECK-LABEL: @avx2_psrl_q_0
644 ; CHECK-NEXT: ret <4 x i64> %v
645 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
649 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
650 ; CHECK-LABEL: @avx2_psrl_q_15
651 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
652 ; CHECK-NEXT: ret <4 x i64> %1
653 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
657 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
658 ; CHECK-LABEL: @avx2_psrl_q_64
659 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
660 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
665 ; SHL - Constant Vector
668 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
669 ; CHECK-LABEL: @sse2_psll_w_0
670 ; CHECK-NEXT: ret <8 x i16> %v
671 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
675 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
676 ; CHECK-LABEL: @sse2_psll_w_15
677 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
678 ; CHECK-NEXT: ret <8 x i16> %1
679 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
683 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
684 ; CHECK-LABEL: @sse2_psll_w_15_splat
685 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
686 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
690 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
691 ; CHECK-LABEL: @sse2_psll_w_64
692 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
693 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
697 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
698 ; CHECK-LABEL: @sse2_psll_d_0
699 ; CHECK-NEXT: ret <4 x i32> %v
700 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
704 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
705 ; CHECK-LABEL: @sse2_psll_d_15
706 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
707 ; CHECK-NEXT: ret <4 x i32> %1
708 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
712 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
713 ; CHECK-LABEL: @sse2_psll_d_15_splat
714 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
715 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
719 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
720 ; CHECK-LABEL: @sse2_psll_d_64
721 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
722 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
726 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
727 ; CHECK-LABEL: @sse2_psll_q_0
728 ; CHECK-NEXT: ret <2 x i64> %v
729 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
733 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
734 ; CHECK-LABEL: @sse2_psll_q_15
735 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
736 ; CHECK-NEXT: ret <2 x i64> %1
737 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
741 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
742 ; CHECK-LABEL: @sse2_psll_q_64
743 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
744 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
748 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
749 ; CHECK-LABEL: @avx2_psll_w_0
750 ; CHECK-NEXT: ret <16 x i16> %v
751 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
755 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
756 ; CHECK-LABEL: @avx2_psll_w_15
757 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
758 ; CHECK-NEXT: ret <16 x i16> %1
759 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
763 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
764 ; CHECK-LABEL: @avx2_psll_w_15_splat
765 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
766 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
770 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
771 ; CHECK-LABEL: @avx2_psll_w_64
772 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
773 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
777 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
778 ; CHECK-LABEL: @avx2_psll_d_0
779 ; CHECK-NEXT: ret <8 x i32> %v
780 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
784 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
785 ; CHECK-LABEL: @avx2_psll_d_15
786 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
787 ; CHECK-NEXT: ret <8 x i32> %1
788 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
792 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
793 ; CHECK-LABEL: @avx2_psll_d_15_splat
794 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
795 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
799 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
800 ; CHECK-LABEL: @avx2_psll_d_64
801 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
802 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
806 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
807 ; CHECK-LABEL: @avx2_psll_q_0
808 ; CHECK-NEXT: ret <4 x i64> %v
809 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
813 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
814 ; CHECK-LABEL: @avx2_psll_q_15
815 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
816 ; CHECK-NEXT: ret <4 x i64> %1
817 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
821 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
822 ; CHECK-LABEL: @avx2_psll_q_64
823 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
824 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
829 ; Vector Demanded Bits
832 define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
833 ; CHECK-LABEL: @sse2_psra_w_var
834 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
835 ; CHECK-NEXT: ret <8 x i16> %1
836 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
837 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
841 define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
842 ; CHECK-LABEL: @sse2_psra_w_var_bc
843 ; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16>
844 ; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
845 ; CHECK-NEXT: ret <8 x i16> %2
846 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
847 %2 = bitcast <2 x i64> %1 to <8 x i16>
848 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
852 define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
853 ; CHECK-LABEL: @sse2_psra_d_var
854 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
855 ; CHECK-NEXT: ret <4 x i32> %1
856 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
857 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
861 define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
862 ; CHECK-LABEL: @sse2_psra_d_var_bc
863 ; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32>
864 ; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
865 ; CHECK-NEXT: ret <4 x i32> %2
866 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
867 %2 = bitcast <8 x i16> %1 to <4 x i32>
868 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
872 define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
873 ; CHECK-LABEL: @avx2_psra_w_var
874 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
875 ; CHECK-NEXT: ret <16 x i16> %1
876 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
877 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
881 define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
882 ; CHECK-LABEL: @avx2_psra_d_var
883 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
884 ; CHECK-NEXT: ret <8 x i32> %1
885 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
886 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
890 define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
891 ; CHECK-LABEL: @sse2_psrl_w_var
892 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
893 ; CHECK-NEXT: ret <8 x i16> %1
894 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
895 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
899 define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
900 ; CHECK-LABEL: @sse2_psrl_d_var
901 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
902 ; CHECK-NEXT: ret <4 x i32> %1
903 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
904 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
908 define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
909 ; CHECK-LABEL: @sse2_psrl_q_var
910 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
911 ; CHECK-NEXT: ret <2 x i64> %1
912 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
913 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
917 define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
918 ; CHECK-LABEL: @avx2_psrl_w_var
919 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
920 ; CHECK-NEXT: ret <16 x i16> %1
921 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
922 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
926 define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
927 ; CHECK-LABEL: @avx2_psrl_w_var_bc
928 ; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16>
929 ; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
930 ; CHECK-NEXT: ret <16 x i16> %2
931 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
932 %2 = bitcast <16 x i8> %1 to <8 x i16>
933 %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
937 define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
938 ; CHECK-LABEL: @avx2_psrl_d_var
939 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
940 ; CHECK-NEXT: ret <8 x i32> %1
941 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
942 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
946 define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
947 ; CHECK-LABEL: @avx2_psrl_d_var_bc
948 ; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32>
949 ; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
950 ; CHECK-NEXT: ret <8 x i32> %2
951 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
952 %2 = bitcast <2 x i64> %1 to <4 x i32>
953 %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
957 define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
958 ; CHECK-LABEL: @avx2_psrl_q_var
959 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
960 ; CHECK-NEXT: ret <4 x i64> %1
961 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
962 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
966 define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
967 ; CHECK-LABEL: @sse2_psll_w_var
968 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
969 ; CHECK-NEXT: ret <8 x i16> %1
970 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
971 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
975 define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
976 ; CHECK-LABEL: @sse2_psll_d_var
977 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
978 ; CHECK-NEXT: ret <4 x i32> %1
979 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
980 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
984 define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
985 ; CHECK-LABEL: @sse2_psll_q_var
986 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
987 ; CHECK-NEXT: ret <2 x i64> %1
988 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
989 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
993 define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
994 ; CHECK-LABEL: @avx2_psll_w_var
995 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
996 ; CHECK-NEXT: ret <16 x i16> %1
997 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
998 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
1002 define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
1003 ; CHECK-LABEL: @avx2_psll_d_var
1004 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
1005 ; CHECK-NEXT: ret <8 x i32> %1
1006 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1007 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
1011 define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
1012 ; CHECK-LABEL: @avx2_psll_q_var
1013 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
1014 ; CHECK-NEXT: ret <4 x i64> %1
1015 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1016 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
1024 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
1025 ; CHECK-LABEL: @test_sse2_psra_w_0
1026 ; CHECK-NEXT: ret <8 x i16> %A
1027 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
1028 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1029 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
1033 define <8 x i16> @test_sse2_psra_w_8() {
1034 ; CHECK-LABEL: @test_sse2_psra_w_8
1035 ; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1036 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
1037 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
1038 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1039 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
1043 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
1044 ; CHECK-LABEL: @test_sse2_psra_d_0
1045 ; CHECK-NEXT: ret <4 x i32> %A
1046 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
1047 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1048 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
1052 define <4 x i32> @sse2_psra_d_8() {
1053 ; CHECK-LABEL: @sse2_psra_d_8
1054 ; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1055 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
1056 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
1057 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1058 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
1062 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
1063 ; CHECK-LABEL: @test_avx2_psra_w_0
1064 ; CHECK-NEXT: ret <16 x i16> %A
1065 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
1066 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1067 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
1071 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
1072 ; CHECK-LABEL: @test_avx2_psra_w_8
1073 ; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1074 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
1075 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
1076 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1077 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
1081 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
1082 ; CHECK-LABEL: @test_avx2_psra_d_0
1083 ; CHECK-NEXT: ret <8 x i32> %A
1084 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
1085 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1086 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
1090 define <8 x i32> @test_avx2_psra_d_8() {
1091 ; CHECK-LABEL: @test_avx2_psra_d_8
1092 ; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1093 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
1094 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
1095 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1096 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
1100 define <2 x i64> @test_sse2_1() {
1101 %S = bitcast i32 1 to i32
1102 %1 = zext i32 %S to i64
1103 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1104 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1105 %4 = bitcast <2 x i64> %3 to <8 x i16>
1106 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1107 %6 = bitcast <8 x i16> %5 to <4 x i32>
1108 %7 = bitcast <2 x i64> %3 to <4 x i32>
1109 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1110 %9 = bitcast <4 x i32> %8 to <2 x i64>
1111 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1112 %11 = bitcast <2 x i64> %10 to <8 x i16>
1113 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1114 %13 = bitcast <8 x i16> %12 to <4 x i32>
1115 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1116 %15 = bitcast <4 x i32> %14 to <2 x i64>
1117 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1119 ; CHECK: test_sse2_1
1120 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
1123 define <4 x i64> @test_avx2_1() {
1124 %S = bitcast i32 1 to i32
1125 %1 = zext i32 %S to i64
1126 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1127 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1128 %4 = bitcast <2 x i64> %3 to <8 x i16>
1129 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1130 %6 = bitcast <16 x i16> %5 to <8 x i32>
1131 %7 = bitcast <2 x i64> %3 to <4 x i32>
1132 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1133 %9 = bitcast <8 x i32> %8 to <4 x i64>
1134 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1135 %11 = bitcast <4 x i64> %10 to <16 x i16>
1136 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1137 %13 = bitcast <16 x i16> %12 to <8 x i32>
1138 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1139 %15 = bitcast <8 x i32> %14 to <4 x i64>
1140 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1142 ; CHECK: test_avx2_1
1143 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
1146 define <2 x i64> @test_sse2_0() {
1147 %S = bitcast i32 128 to i32
1148 %1 = zext i32 %S to i64
1149 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1150 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1151 %4 = bitcast <2 x i64> %3 to <8 x i16>
1152 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1153 %6 = bitcast <8 x i16> %5 to <4 x i32>
1154 %7 = bitcast <2 x i64> %3 to <4 x i32>
1155 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1156 %9 = bitcast <4 x i32> %8 to <2 x i64>
1157 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1158 %11 = bitcast <2 x i64> %10 to <8 x i16>
1159 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1160 %13 = bitcast <8 x i16> %12 to <4 x i32>
1161 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1162 %15 = bitcast <4 x i32> %14 to <2 x i64>
1163 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1165 ; CHECK: test_sse2_0
1166 ; CHECK: ret <2 x i64> zeroinitializer
1169 define <4 x i64> @test_avx2_0() {
1170 %S = bitcast i32 128 to i32
1171 %1 = zext i32 %S to i64
1172 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1173 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1174 %4 = bitcast <2 x i64> %3 to <8 x i16>
1175 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1176 %6 = bitcast <16 x i16> %5 to <8 x i32>
1177 %7 = bitcast <2 x i64> %3 to <4 x i32>
1178 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1179 %9 = bitcast <8 x i32> %8 to <4 x i64>
1180 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1181 %11 = bitcast <4 x i64> %10 to <16 x i16>
1182 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1183 %13 = bitcast <16 x i16> %12 to <8 x i32>
1184 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1185 %15 = bitcast <8 x i32> %14 to <4 x i64>
1186 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1188 ; CHECK: test_avx2_0
1189 ; CHECK: ret <4 x i64> zeroinitializer
1191 define <2 x i64> @test_sse2_psrl_1() {
1192 %S = bitcast i32 1 to i32
1193 %1 = zext i32 %S to i64
1194 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1195 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1196 %4 = bitcast <2 x i64> %3 to <8 x i16>
1197 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
1198 %6 = bitcast <8 x i16> %5 to <4 x i32>
1199 %7 = bitcast <2 x i64> %3 to <4 x i32>
1200 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1201 %9 = bitcast <4 x i32> %8 to <2 x i64>
1202 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1203 %11 = bitcast <2 x i64> %10 to <8 x i16>
1204 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1205 %13 = bitcast <8 x i16> %12 to <4 x i32>
1206 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1207 %15 = bitcast <4 x i32> %14 to <2 x i64>
1208 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1210 ; CHECK: test_sse2_psrl_1
1211 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
1214 define <4 x i64> @test_avx2_psrl_1() {
1215 %S = bitcast i32 1 to i32
1216 %1 = zext i32 %S to i64
1217 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1218 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1219 %4 = bitcast <2 x i64> %3 to <8 x i16>
1220 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1221 %6 = bitcast <16 x i16> %5 to <8 x i32>
1222 %7 = bitcast <2 x i64> %3 to <4 x i32>
1223 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1224 %9 = bitcast <8 x i32> %8 to <4 x i64>
1225 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1226 %11 = bitcast <4 x i64> %10 to <16 x i16>
1227 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1228 %13 = bitcast <16 x i16> %12 to <8 x i32>
1229 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1230 %15 = bitcast <8 x i32> %14 to <4 x i64>
1231 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1233 ; CHECK: test_avx2_psrl_1
1234 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
1237 define <2 x i64> @test_sse2_psrl_0() {
1238 %S = bitcast i32 128 to i32
1239 %1 = zext i32 %S to i64
1240 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1241 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1242 %4 = bitcast <2 x i64> %3 to <8 x i16>
1243 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
1244 %6 = bitcast <8 x i16> %5 to <4 x i32>
1245 %7 = bitcast <2 x i64> %3 to <4 x i32>
1246 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1247 %9 = bitcast <4 x i32> %8 to <2 x i64>
1248 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1249 %11 = bitcast <2 x i64> %10 to <8 x i16>
1250 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1251 %13 = bitcast <8 x i16> %12 to <4 x i32>
1252 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1253 %15 = bitcast <4 x i32> %14 to <2 x i64>
1254 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1256 ; CHECK: test_sse2_psrl_0
1257 ; CHECK: ret <2 x i64> zeroinitializer
1260 define <4 x i64> @test_avx2_psrl_0() {
1261 %S = bitcast i32 128 to i32
1262 %1 = zext i32 %S to i64
1263 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1264 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1265 %4 = bitcast <2 x i64> %3 to <8 x i16>
1266 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1267 %6 = bitcast <16 x i16> %5 to <8 x i32>
1268 %7 = bitcast <2 x i64> %3 to <4 x i32>
1269 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1270 %9 = bitcast <8 x i32> %8 to <4 x i64>
1271 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1272 %11 = bitcast <4 x i64> %10 to <16 x i16>
1273 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1274 %13 = bitcast <16 x i16> %12 to <8 x i32>
1275 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1276 %15 = bitcast <8 x i32> %14 to <4 x i64>
1277 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1279 ; CHECK: test_avx2_psrl_0
1280 ; CHECK: ret <4 x i64> zeroinitializer
1283 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
1284 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
1285 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
1286 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
1287 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
1288 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
1289 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
1290 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
1291 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
1292 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
1293 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
1294 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
1296 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
1297 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
1298 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
1299 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
1300 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
1301 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
1302 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
1303 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
1304 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
1305 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
1306 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
1307 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
1309 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
1310 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
1311 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
1312 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
1313 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
1314 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
1315 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
1316 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
1318 attributes #1 = { nounwind readnone }