1 ; RUN: opt < %s -instcombine -S | FileCheck %s
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) nounwind readnone uwtable {
9 ; CHECK-LABEL: @sse2_psrai_w_0
10 ; CHECK-NEXT: ret <8 x i16> %v
11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
15 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) nounwind readnone uwtable {
16 ; CHECK-LABEL: @sse2_psrai_w_15
17 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
18 ; CHECK-NEXT: ret <8 x i16> %1
19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
23 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) nounwind readnone uwtable {
24 ; CHECK-LABEL: @sse2_psrai_w_64
25 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
26 ; CHECK-NEXT: ret <8 x i16> %1
27 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
31 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) nounwind readnone uwtable {
32 ; CHECK-LABEL: @sse2_psrai_d_0
33 ; CHECK-NEXT: ret <4 x i32> %v
34 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
38 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) nounwind readnone uwtable {
39 ; CHECK-LABEL: @sse2_psrai_d_15
40 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
41 ; CHECK-NEXT: ret <4 x i32> %1
42 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
46 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) nounwind readnone uwtable {
47 ; CHECK-LABEL: @sse2_psrai_d_64
48 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
49 ; CHECK-NEXT: ret <4 x i32> %1
50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
54 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) nounwind readnone uwtable {
55 ; CHECK-LABEL: @avx2_psrai_w_0
56 ; CHECK-NEXT: ret <16 x i16> %v
57 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
61 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) nounwind readnone uwtable {
62 ; CHECK-LABEL: @avx2_psrai_w_15
63 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
64 ; CHECK-NEXT: ret <16 x i16> %1
65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
69 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) nounwind readnone uwtable {
70 ; CHECK-LABEL: @avx2_psrai_w_64
71 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
72 ; CHECK-NEXT: ret <16 x i16> %1
73 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
77 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) nounwind readnone uwtable {
78 ; CHECK-LABEL: @avx2_psrai_d_0
79 ; CHECK-NEXT: ret <8 x i32> %v
80 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
84 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) nounwind readnone uwtable {
85 ; CHECK-LABEL: @avx2_psrai_d_15
86 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
87 ; CHECK-NEXT: ret <8 x i32> %1
88 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
92 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) nounwind readnone uwtable {
93 ; CHECK-LABEL: @avx2_psrai_d_64
94 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
95 ; CHECK-NEXT: ret <8 x i32> %1
96 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
104 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
105 ; CHECK-LABEL: @sse2_psrli_w_0
106 ; CHECK-NEXT: ret <8 x i16> %v
107 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
111 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
112 ; CHECK-LABEL: @sse2_psrli_w_15
113 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
114 ; CHECK-NEXT: ret <8 x i16> %1
115 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
119 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
120 ; CHECK-LABEL: @sse2_psrli_w_64
121 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
122 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
126 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
127 ; CHECK-LABEL: @sse2_psrli_d_0
128 ; CHECK-NEXT: ret <4 x i32> %v
129 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
133 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
134 ; CHECK-LABEL: @sse2_psrli_d_15
135 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
136 ; CHECK-NEXT: ret <4 x i32> %1
137 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
141 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
142 ; CHECK-LABEL: @sse2_psrli_d_64
143 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
144 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
148 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
149 ; CHECK-LABEL: @sse2_psrli_q_0
150 ; CHECK-NEXT: ret <2 x i64> %v
151 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
155 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
156 ; CHECK-LABEL: @sse2_psrli_q_15
157 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
158 ; CHECK-NEXT: ret <2 x i64> %1
159 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
163 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
164 ; CHECK-LABEL: @sse2_psrli_q_64
165 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
166 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
170 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
171 ; CHECK-LABEL: @avx2_psrli_w_0
172 ; CHECK-NEXT: ret <16 x i16> %v
173 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
177 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
178 ; CHECK-LABEL: @avx2_psrli_w_15
179 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
180 ; CHECK-NEXT: ret <16 x i16> %1
181 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
185 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
186 ; CHECK-LABEL: @avx2_psrli_w_64
187 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
188 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
192 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
193 ; CHECK-LABEL: @avx2_psrli_d_0
194 ; CHECK-NEXT: ret <8 x i32> %v
195 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
199 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
200 ; CHECK-LABEL: @avx2_psrli_d_15
201 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
202 ; CHECK-NEXT: ret <8 x i32> %1
203 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
207 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
208 ; CHECK-LABEL: @avx2_psrli_d_64
209 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
210 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
214 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
215 ; CHECK-LABEL: @avx2_psrli_q_0
216 ; CHECK-NEXT: ret <4 x i64> %v
217 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
221 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
222 ; CHECK-LABEL: @avx2_psrli_q_15
223 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
224 ; CHECK-NEXT: ret <4 x i64> %1
225 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
229 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
230 ; CHECK-LABEL: @avx2_psrli_q_64
231 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
232 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
240 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
241 ; CHECK-LABEL: @sse2_pslli_w_0
242 ; CHECK-NEXT: ret <8 x i16> %v
243 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
247 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
248 ; CHECK-LABEL: @sse2_pslli_w_15
249 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
250 ; CHECK-NEXT: ret <8 x i16> %1
251 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
255 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
256 ; CHECK-LABEL: @sse2_pslli_w_64
257 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
258 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
262 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
263 ; CHECK-LABEL: @sse2_pslli_d_0
264 ; CHECK-NEXT: ret <4 x i32> %v
265 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
269 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
270 ; CHECK-LABEL: @sse2_pslli_d_15
271 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
272 ; CHECK-NEXT: ret <4 x i32> %1
273 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
277 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
278 ; CHECK-LABEL: @sse2_pslli_d_64
279 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
280 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
284 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
285 ; CHECK-LABEL: @sse2_pslli_q_0
286 ; CHECK-NEXT: ret <2 x i64> %v
287 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
291 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
292 ; CHECK-LABEL: @sse2_pslli_q_15
293 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
294 ; CHECK-NEXT: ret <2 x i64> %1
295 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
299 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
300 ; CHECK-LABEL: @sse2_pslli_q_64
301 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
302 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
306 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
307 ; CHECK-LABEL: @avx2_pslli_w_0
308 ; CHECK-NEXT: ret <16 x i16> %v
309 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
313 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
314 ; CHECK-LABEL: @avx2_pslli_w_15
315 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
316 ; CHECK-NEXT: ret <16 x i16> %1
317 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
321 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
322 ; CHECK-LABEL: @avx2_pslli_w_64
323 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
324 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
328 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
329 ; CHECK-LABEL: @avx2_pslli_d_0
330 ; CHECK-NEXT: ret <8 x i32> %v
331 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
335 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
336 ; CHECK-LABEL: @avx2_pslli_d_15
337 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
338 ; CHECK-NEXT: ret <8 x i32> %1
339 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
343 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
344 ; CHECK-LABEL: @avx2_pslli_d_64
345 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
346 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
350 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
351 ; CHECK-LABEL: @avx2_pslli_q_0
352 ; CHECK-NEXT: ret <4 x i64> %v
353 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
357 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
358 ; CHECK-LABEL: @avx2_pslli_q_15
359 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
360 ; CHECK-NEXT: ret <4 x i64> %1
361 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
365 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
366 ; CHECK-LABEL: @avx2_pslli_q_64
367 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
368 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
373 ; ASHR - Constant Vector
376 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) nounwind readnone uwtable {
377 ; CHECK-LABEL: @sse2_psra_w_0
378 ; CHECK-NEXT: ret <8 x i16> %v
379 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
383 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) nounwind readnone uwtable {
384 ; CHECK-LABEL: @sse2_psra_w_15
385 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
386 ; CHECK-NEXT: ret <8 x i16> %1
387 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
391 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
392 ; CHECK-LABEL: @sse2_psra_w_15_splat
393 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
394 ; CHECK-NEXT: ret <8 x i16> %1
395 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
399 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) nounwind readnone uwtable {
400 ; CHECK-LABEL: @sse2_psra_w_64
401 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
402 ; CHECK-NEXT: ret <8 x i16> %1
403 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
407 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) nounwind readnone uwtable {
408 ; CHECK-LABEL: @sse2_psra_d_0
409 ; CHECK-NEXT: ret <4 x i32> %v
410 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
414 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) nounwind readnone uwtable {
415 ; CHECK-LABEL: @sse2_psra_d_15
416 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
417 ; CHECK-NEXT: ret <4 x i32> %1
418 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
422 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
423 ; CHECK-LABEL: @sse2_psra_d_15_splat
424 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
425 ; CHECK-NEXT: ret <4 x i32> %1
426 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
430 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) nounwind readnone uwtable {
431 ; CHECK-LABEL: @sse2_psra_d_64
432 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
433 ; CHECK-NEXT: ret <4 x i32> %1
434 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
438 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) nounwind readnone uwtable {
439 ; CHECK-LABEL: @avx2_psra_w_0
440 ; CHECK-NEXT: ret <16 x i16> %v
441 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
445 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) nounwind readnone uwtable {
446 ; CHECK-LABEL: @avx2_psra_w_15
447 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
448 ; CHECK-NEXT: ret <16 x i16> %1
449 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
453 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
454 ; CHECK-LABEL: @avx2_psra_w_15_splat
455 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
456 ; CHECK-NEXT: ret <16 x i16> %1
457 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
461 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) nounwind readnone uwtable {
462 ; CHECK-LABEL: @avx2_psra_w_64
463 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
464 ; CHECK-NEXT: ret <16 x i16> %1
465 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
469 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) nounwind readnone uwtable {
470 ; CHECK-LABEL: @avx2_psra_d_0
471 ; CHECK-NEXT: ret <8 x i32> %v
472 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
476 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) nounwind readnone uwtable {
477 ; CHECK-LABEL: @avx2_psra_d_15
478 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
479 ; CHECK-NEXT: ret <8 x i32> %1
480 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
484 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
485 ; CHECK-LABEL: @avx2_psra_d_15_splat
486 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
487 ; CHECK-NEXT: ret <8 x i32> %1
488 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
492 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) nounwind readnone uwtable {
493 ; CHECK-LABEL: @avx2_psra_d_64
494 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
495 ; CHECK-NEXT: ret <8 x i32> %1
496 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
501 ; LSHR - Constant Vector
504 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) nounwind readnone uwtable {
505 ; CHECK-LABEL: @sse2_psrl_w_0
506 ; CHECK-NEXT: ret <8 x i16> %v
507 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
511 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
512 ; CHECK-LABEL: @sse2_psrl_w_15
513 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
514 ; CHECK-NEXT: ret <8 x i16> %1
515 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
519 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
520 ; CHECK-LABEL: @sse2_psrl_w_15_splat
521 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
522 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
526 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
527 ; CHECK-LABEL: @sse2_psrl_w_64
528 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
529 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
533 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) nounwind readnone uwtable {
534 ; CHECK-LABEL: @sse2_psrl_d_0
535 ; CHECK-NEXT: ret <4 x i32> %v
536 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
540 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
541 ; CHECK-LABEL: @sse2_psrl_d_15
542 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
543 ; CHECK-NEXT: ret <4 x i32> %1
544 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
548 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
549 ; CHECK-LABEL: @sse2_psrl_d_15_splat
550 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
551 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
555 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
556 ; CHECK-LABEL: @sse2_psrl_d_64
557 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
558 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
562 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) nounwind readnone uwtable {
563 ; CHECK-LABEL: @sse2_psrl_q_0
564 ; CHECK-NEXT: ret <2 x i64> %v
565 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
569 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
570 ; CHECK-LABEL: @sse2_psrl_q_15
571 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
572 ; CHECK-NEXT: ret <2 x i64> %1
573 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
577 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
578 ; CHECK-LABEL: @sse2_psrl_q_64
579 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
580 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
584 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) nounwind readnone uwtable {
585 ; CHECK-LABEL: @avx2_psrl_w_0
586 ; CHECK-NEXT: ret <16 x i16> %v
587 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
591 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
592 ; CHECK-LABEL: @avx2_psrl_w_15
593 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
594 ; CHECK-NEXT: ret <16 x i16> %1
595 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
599 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
600 ; CHECK-LABEL: @avx2_psrl_w_15_splat
601 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
602 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
606 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
607 ; CHECK-LABEL: @avx2_psrl_w_64
608 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
609 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
613 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) nounwind readnone uwtable {
614 ; CHECK-LABEL: @avx2_psrl_d_0
615 ; CHECK-NEXT: ret <8 x i32> %v
616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
620 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
621 ; CHECK-LABEL: @avx2_psrl_d_15
622 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
623 ; CHECK-NEXT: ret <8 x i32> %1
624 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
628 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
629 ; CHECK-LABEL: @avx2_psrl_d_15_splat
630 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
631 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
635 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
636 ; CHECK-LABEL: @avx2_psrl_d_64
637 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
638 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
642 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) nounwind readnone uwtable {
643 ; CHECK-LABEL: @avx2_psrl_q_0
644 ; CHECK-NEXT: ret <4 x i64> %v
645 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
649 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
650 ; CHECK-LABEL: @avx2_psrl_q_15
651 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
652 ; CHECK-NEXT: ret <4 x i64> %1
653 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
657 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
658 ; CHECK-LABEL: @avx2_psrl_q_64
659 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
660 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
665 ; SHL - Constant Vector
668 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) nounwind readnone uwtable {
669 ; CHECK-LABEL: @sse2_psll_w_0
670 ; CHECK-NEXT: ret <8 x i16> %v
671 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
675 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
676 ; CHECK-LABEL: @sse2_psll_w_15
677 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
678 ; CHECK-NEXT: ret <8 x i16> %1
679 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
683 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
684 ; CHECK-LABEL: @sse2_psll_w_15_splat
685 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
686 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
690 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
691 ; CHECK-LABEL: @sse2_psll_w_64
692 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
693 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
697 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) nounwind readnone uwtable {
698 ; CHECK-LABEL: @sse2_psll_d_0
699 ; CHECK-NEXT: ret <4 x i32> %v
700 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
704 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
705 ; CHECK-LABEL: @sse2_psll_d_15
706 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
707 ; CHECK-NEXT: ret <4 x i32> %1
708 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
712 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
713 ; CHECK-LABEL: @sse2_psll_d_15_splat
714 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
715 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
719 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
720 ; CHECK-LABEL: @sse2_psll_d_64
721 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
722 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
726 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) nounwind readnone uwtable {
727 ; CHECK-LABEL: @sse2_psll_q_0
728 ; CHECK-NEXT: ret <2 x i64> %v
729 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
733 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
734 ; CHECK-LABEL: @sse2_psll_q_15
735 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
736 ; CHECK-NEXT: ret <2 x i64> %1
737 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
741 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
742 ; CHECK-LABEL: @sse2_psll_q_64
743 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
744 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
748 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) nounwind readnone uwtable {
749 ; CHECK-LABEL: @avx2_psll_w_0
750 ; CHECK-NEXT: ret <16 x i16> %v
751 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
755 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
756 ; CHECK-LABEL: @avx2_psll_w_15
757 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
758 ; CHECK-NEXT: ret <16 x i16> %1
759 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
763 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
764 ; CHECK-LABEL: @avx2_psll_w_15_splat
765 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
766 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
770 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
771 ; CHECK-LABEL: @avx2_psll_w_64
772 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
773 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
777 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) nounwind readnone uwtable {
778 ; CHECK-LABEL: @avx2_psll_d_0
779 ; CHECK-NEXT: ret <8 x i32> %v
780 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
784 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
785 ; CHECK-LABEL: @avx2_psll_d_15
786 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
787 ; CHECK-NEXT: ret <8 x i32> %1
788 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
792 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
793 ; CHECK-LABEL: @avx2_psll_d_15_splat
794 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
795 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
799 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
800 ; CHECK-LABEL: @avx2_psll_d_64
801 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
802 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
806 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) nounwind readnone uwtable {
807 ; CHECK-LABEL: @avx2_psll_q_0
808 ; CHECK-NEXT: ret <4 x i64> %v
809 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
813 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
814 ; CHECK-LABEL: @avx2_psll_q_15
815 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
816 ; CHECK-NEXT: ret <4 x i64> %1
817 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
821 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
822 ; CHECK-LABEL: @avx2_psll_q_64
823 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
824 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
829 ; Vector Demanded Bits
832 define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
833 ; CHECK-LABEL: @sse2_psra_w_var
834 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
835 ; CHECK-NEXT: ret <8 x i16> %1
836 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
837 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
841 define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
842 ; CHECK-LABEL: @sse2_psra_d_var
843 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
844 ; CHECK-NEXT: ret <4 x i32> %1
845 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
846 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
850 define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
851 ; CHECK-LABEL: @avx2_psra_w_var
852 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
853 ; CHECK-NEXT: ret <16 x i16> %1
854 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
855 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
859 define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
860 ; CHECK-LABEL: @avx2_psra_d_var
861 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
862 ; CHECK-NEXT: ret <8 x i32> %1
863 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
864 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
868 define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
869 ; CHECK-LABEL: @sse2_psrl_w_var
870 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
871 ; CHECK-NEXT: ret <8 x i16> %1
872 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
873 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
877 define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
878 ; CHECK-LABEL: @sse2_psrl_d_var
879 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
880 ; CHECK-NEXT: ret <4 x i32> %1
881 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
882 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
886 define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
887 ; CHECK-LABEL: @sse2_psrl_q_var
888 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
889 ; CHECK-NEXT: ret <2 x i64> %1
890 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
891 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
895 define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
896 ; CHECK-LABEL: @avx2_psrl_w_var
897 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
898 ; CHECK-NEXT: ret <16 x i16> %1
899 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
900 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
904 define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
905 ; CHECK-LABEL: @avx2_psrl_d_var
906 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
907 ; CHECK-NEXT: ret <8 x i32> %1
908 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
909 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
913 define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
914 ; CHECK-LABEL: @avx2_psrl_q_var
915 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
916 ; CHECK-NEXT: ret <4 x i64> %1
917 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
918 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
922 define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
923 ; CHECK-LABEL: @sse2_psll_w_var
924 ; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
925 ; CHECK-NEXT: ret <8 x i16> %1
926 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
927 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
931 define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
932 ; CHECK-LABEL: @sse2_psll_d_var
933 ; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
934 ; CHECK-NEXT: ret <4 x i32> %1
935 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
936 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
940 define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
941 ; CHECK-LABEL: @sse2_psll_q_var
942 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
943 ; CHECK-NEXT: ret <2 x i64> %1
944 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
945 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
949 define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) nounwind readnone uwtable {
950 ; CHECK-LABEL: @avx2_psll_w_var
951 ; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
952 ; CHECK-NEXT: ret <16 x i16> %1
953 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
954 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
958 define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) nounwind readnone uwtable {
959 ; CHECK-LABEL: @avx2_psll_d_var
960 ; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
961 ; CHECK-NEXT: ret <8 x i32> %1
962 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
963 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
967 define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) nounwind readnone uwtable {
968 ; CHECK-LABEL: @avx2_psll_q_var
969 ; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
970 ; CHECK-NEXT: ret <4 x i64> %1
971 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
972 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
980 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
981 ; CHECK-LABEL: @test_sse2_psra_w_0
982 ; CHECK-NEXT: ret <8 x i16> %A
983 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
984 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
985 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
989 define <8 x i16> @test_sse2_psra_w_8() {
990 ; CHECK-LABEL: @test_sse2_psra_w_8
991 ; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
992 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
993 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
994 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
995 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
999 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
1000 ; CHECK-LABEL: @test_sse2_psra_d_0
1001 ; CHECK-NEXT: ret <4 x i32> %A
1002 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
1003 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1004 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
1008 define <4 x i32> @sse2_psra_d_8() {
1009 ; CHECK-LABEL: @sse2_psra_d_8
1010 ; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1011 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
1012 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
1013 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1014 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
1018 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
1019 ; CHECK-LABEL: @test_avx2_psra_w_0
1020 ; CHECK-NEXT: ret <16 x i16> %A
1021 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
1022 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1023 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
1027 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
1028 ; CHECK-LABEL: @test_avx2_psra_w_8
1029 ; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
1030 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
1031 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
1032 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
1033 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
1037 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
1038 ; CHECK-LABEL: @test_avx2_psra_d_0
1039 ; CHECK-NEXT: ret <8 x i32> %A
1040 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
1041 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
1042 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
1046 define <8 x i32> @test_avx2_psra_d_8() {
1047 ; CHECK-LABEL: @test_avx2_psra_d_8
1048 ; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
1049 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
1050 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
1051 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
1052 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
1056 define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
1057 %S = bitcast i32 1 to i32
1058 %1 = zext i32 %S to i64
1059 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1060 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1061 %4 = bitcast <2 x i64> %3 to <8 x i16>
1062 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1063 %6 = bitcast <8 x i16> %5 to <4 x i32>
1064 %7 = bitcast <2 x i64> %3 to <4 x i32>
1065 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1066 %9 = bitcast <4 x i32> %8 to <2 x i64>
1067 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1068 %11 = bitcast <2 x i64> %10 to <8 x i16>
1069 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1070 %13 = bitcast <8 x i16> %12 to <4 x i32>
1071 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1072 %15 = bitcast <4 x i32> %14 to <2 x i64>
1073 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1075 ; CHECK: test_sse2_1
1076 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
1079 define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
1080 %S = bitcast i32 1 to i32
1081 %1 = zext i32 %S to i64
1082 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1083 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1084 %4 = bitcast <2 x i64> %3 to <8 x i16>
1085 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1086 %6 = bitcast <16 x i16> %5 to <8 x i32>
1087 %7 = bitcast <2 x i64> %3 to <4 x i32>
1088 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1089 %9 = bitcast <8 x i32> %8 to <4 x i64>
1090 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1091 %11 = bitcast <4 x i64> %10 to <16 x i16>
1092 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1093 %13 = bitcast <16 x i16> %12 to <8 x i32>
1094 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1095 %15 = bitcast <8 x i32> %14 to <4 x i64>
1096 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1098 ; CHECK: test_avx2_1
1099 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
1102 define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
1103 %S = bitcast i32 128 to i32
1104 %1 = zext i32 %S to i64
1105 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1106 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1107 %4 = bitcast <2 x i64> %3 to <8 x i16>
1108 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
1109 %6 = bitcast <8 x i16> %5 to <4 x i32>
1110 %7 = bitcast <2 x i64> %3 to <4 x i32>
1111 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
1112 %9 = bitcast <4 x i32> %8 to <2 x i64>
1113 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
1114 %11 = bitcast <2 x i64> %10 to <8 x i16>
1115 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
1116 %13 = bitcast <8 x i16> %12 to <4 x i32>
1117 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
1118 %15 = bitcast <4 x i32> %14 to <2 x i64>
1119 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
1121 ; CHECK: test_sse2_0
1122 ; CHECK: ret <2 x i64> zeroinitializer
1125 define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
1126 %S = bitcast i32 128 to i32
1127 %1 = zext i32 %S to i64
1128 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1129 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1130 %4 = bitcast <2 x i64> %3 to <8 x i16>
1131 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
1132 %6 = bitcast <16 x i16> %5 to <8 x i32>
1133 %7 = bitcast <2 x i64> %3 to <4 x i32>
1134 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
1135 %9 = bitcast <8 x i32> %8 to <4 x i64>
1136 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
1137 %11 = bitcast <4 x i64> %10 to <16 x i16>
1138 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
1139 %13 = bitcast <16 x i16> %12 to <8 x i32>
1140 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
1141 %15 = bitcast <8 x i32> %14 to <4 x i64>
1142 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
1144 ; CHECK: test_avx2_0
1145 ; CHECK: ret <4 x i64> zeroinitializer
1147 define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
1148 %S = bitcast i32 1 to i32
1149 %1 = zext i32 %S to i64
1150 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1151 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1152 %4 = bitcast <2 x i64> %3 to <8 x i16>
1153 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
1154 %6 = bitcast <8 x i16> %5 to <4 x i32>
1155 %7 = bitcast <2 x i64> %3 to <4 x i32>
1156 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1157 %9 = bitcast <4 x i32> %8 to <2 x i64>
1158 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1159 %11 = bitcast <2 x i64> %10 to <8 x i16>
1160 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1161 %13 = bitcast <8 x i16> %12 to <4 x i32>
1162 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1163 %15 = bitcast <4 x i32> %14 to <2 x i64>
1164 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1166 ; CHECK: test_sse2_psrl_1
1167 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
1170 define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
1171 %S = bitcast i32 1 to i32
1172 %1 = zext i32 %S to i64
1173 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1174 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1175 %4 = bitcast <2 x i64> %3 to <8 x i16>
1176 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1177 %6 = bitcast <16 x i16> %5 to <8 x i32>
1178 %7 = bitcast <2 x i64> %3 to <4 x i32>
1179 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1180 %9 = bitcast <8 x i32> %8 to <4 x i64>
1181 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1182 %11 = bitcast <4 x i64> %10 to <16 x i16>
1183 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1184 %13 = bitcast <16 x i16> %12 to <8 x i32>
1185 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1186 %15 = bitcast <8 x i32> %14 to <4 x i64>
1187 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1189 ; CHECK: test_avx2_psrl_1
1190 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
1193 define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
1194 %S = bitcast i32 128 to i32
1195 %1 = zext i32 %S to i64
1196 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1197 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1198 %4 = bitcast <2 x i64> %3 to <8 x i16>
1199 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
1200 %6 = bitcast <8 x i16> %5 to <4 x i32>
1201 %7 = bitcast <2 x i64> %3 to <4 x i32>
1202 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
1203 %9 = bitcast <4 x i32> %8 to <2 x i64>
1204 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
1205 %11 = bitcast <2 x i64> %10 to <8 x i16>
1206 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
1207 %13 = bitcast <8 x i16> %12 to <4 x i32>
1208 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
1209 %15 = bitcast <4 x i32> %14 to <2 x i64>
1210 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
1212 ; CHECK: test_sse2_psrl_0
1213 ; CHECK: ret <2 x i64> zeroinitializer
1216 define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
1217 %S = bitcast i32 128 to i32
1218 %1 = zext i32 %S to i64
1219 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1220 %3 = insertelement <2 x i64> %2, i64 0, i32 1
1221 %4 = bitcast <2 x i64> %3 to <8 x i16>
1222 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
1223 %6 = bitcast <16 x i16> %5 to <8 x i32>
1224 %7 = bitcast <2 x i64> %3 to <4 x i32>
1225 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
1226 %9 = bitcast <8 x i32> %8 to <4 x i64>
1227 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
1228 %11 = bitcast <4 x i64> %10 to <16 x i16>
1229 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
1230 %13 = bitcast <16 x i16> %12 to <8 x i32>
1231 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
1232 %15 = bitcast <8 x i32> %14 to <4 x i64>
1233 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
1235 ; CHECK: test_avx2_psrl_0
1236 ; CHECK: ret <4 x i64> zeroinitializer
1239 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
1240 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
1241 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
1242 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
1243 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
1244 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
1245 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
1246 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
1247 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
1248 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
1249 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
1250 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
1252 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
1253 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
1254 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
1255 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
1256 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
1257 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
1258 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
1259 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
1260 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
1261 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
1262 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
1263 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
1265 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
1266 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
1267 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
1268 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
1269 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
1270 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
1271 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
1272 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
1274 attributes #1 = { nounwind readnone }