1 ; RUN: opt < %s -instcombine -S | FileCheck %s
\r
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
\r
8 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
9 ; CHECK-LABEL: @sse2_psrai_w_0
\r
10 ; CHECK-NEXT: ret <8 x i16> %v
\r
11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
\r
15 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
16 ; CHECK-LABEL: @sse2_psrai_w_15
\r
17 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
18 ; CHECK-NEXT: ret <8 x i16> %1
\r
19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
\r
23 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
24 ; CHECK-LABEL: @sse2_psrai_w_64
\r
25 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
26 ; CHECK-NEXT: ret <8 x i16> %1
\r
27 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
\r
31 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
32 ; CHECK-LABEL: @sse2_psrai_d_0
\r
33 ; CHECK-NEXT: ret <4 x i32> %v
\r
34 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
\r
38 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
39 ; CHECK-LABEL: @sse2_psrai_d_15
\r
40 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
41 ; CHECK-NEXT: ret <4 x i32> %1
\r
42 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
\r
46 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
47 ; CHECK-LABEL: @sse2_psrai_d_64
\r
48 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
\r
49 ; CHECK-NEXT: ret <4 x i32> %1
\r
50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
\r
54 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
55 ; CHECK-LABEL: @avx2_psrai_w_0
\r
56 ; CHECK-NEXT: ret <16 x i16> %v
\r
57 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
\r
61 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
62 ; CHECK-LABEL: @avx2_psrai_w_15
\r
63 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
64 ; CHECK-NEXT: ret <16 x i16> %1
\r
65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
\r
69 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
70 ; CHECK-LABEL: @avx2_psrai_w_64
\r
71 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
72 ; CHECK-NEXT: ret <16 x i16> %1
\r
73 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
\r
77 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
78 ; CHECK-LABEL: @avx2_psrai_d_0
\r
79 ; CHECK-NEXT: ret <8 x i32> %v
\r
80 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
\r
84 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
85 ; CHECK-LABEL: @avx2_psrai_d_15
\r
86 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
87 ; CHECK-NEXT: ret <8 x i32> %1
\r
88 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
\r
92 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
93 ; CHECK-LABEL: @avx2_psrai_d_64
\r
94 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
\r
95 ; CHECK-NEXT: ret <8 x i32> %1
\r
96 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
\r
104 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
105 ; CHECK-LABEL: @sse2_psrli_w_0
\r
106 ; CHECK-NEXT: ret <8 x i16> %v
\r
107 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
\r
111 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
112 ; CHECK-LABEL: @sse2_psrli_w_15
\r
113 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
114 ; CHECK-NEXT: ret <8 x i16> %1
\r
115 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
\r
119 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
120 ; CHECK-LABEL: @sse2_psrli_w_64
\r
121 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
122 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
\r
126 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
127 ; CHECK-LABEL: @sse2_psrli_d_0
\r
128 ; CHECK-NEXT: ret <4 x i32> %v
\r
129 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
\r
133 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
134 ; CHECK-LABEL: @sse2_psrli_d_15
\r
135 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
136 ; CHECK-NEXT: ret <4 x i32> %1
\r
137 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
\r
141 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
142 ; CHECK-LABEL: @sse2_psrli_d_64
\r
143 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
144 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
\r
148 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
149 ; CHECK-LABEL: @sse2_psrli_q_0
\r
150 ; CHECK-NEXT: ret <2 x i64> %v
\r
151 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
\r
155 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
156 ; CHECK-LABEL: @sse2_psrli_q_15
\r
157 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
158 ; CHECK-NEXT: ret <2 x i64> %1
\r
159 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
\r
163 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
164 ; CHECK-LABEL: @sse2_psrli_q_64
\r
165 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
166 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
\r
170 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
171 ; CHECK-LABEL: @avx2_psrli_w_0
\r
172 ; CHECK-NEXT: ret <16 x i16> %v
\r
173 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
\r
177 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
178 ; CHECK-LABEL: @avx2_psrli_w_15
\r
179 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
180 ; CHECK-NEXT: ret <16 x i16> %1
\r
181 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
\r
185 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
186 ; CHECK-LABEL: @avx2_psrli_w_64
\r
187 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
188 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
\r
192 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
193 ; CHECK-LABEL: @avx2_psrli_d_0
\r
194 ; CHECK-NEXT: ret <8 x i32> %v
\r
195 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
\r
199 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
200 ; CHECK-LABEL: @avx2_psrli_d_15
\r
201 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
202 ; CHECK-NEXT: ret <8 x i32> %1
\r
203 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
\r
207 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
208 ; CHECK-LABEL: @avx2_psrli_d_64
\r
209 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
210 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
\r
214 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
215 ; CHECK-LABEL: @avx2_psrli_q_0
\r
216 ; CHECK-NEXT: ret <4 x i64> %v
\r
217 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
\r
221 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
222 ; CHECK-LABEL: @avx2_psrli_q_15
\r
223 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
224 ; CHECK-NEXT: ret <4 x i64> %1
\r
225 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
\r
229 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
230 ; CHECK-LABEL: @avx2_psrli_q_64
\r
231 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
232 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
\r
240 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
241 ; CHECK-LABEL: @sse2_pslli_w_0
\r
242 ; CHECK-NEXT: ret <8 x i16> %v
\r
243 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
\r
247 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
248 ; CHECK-LABEL: @sse2_pslli_w_15
\r
249 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
250 ; CHECK-NEXT: ret <8 x i16> %1
\r
251 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
\r
255 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
256 ; CHECK-LABEL: @sse2_pslli_w_64
\r
257 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
258 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
\r
262 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
263 ; CHECK-LABEL: @sse2_pslli_d_0
\r
264 ; CHECK-NEXT: ret <4 x i32> %v
\r
265 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
\r
269 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
270 ; CHECK-LABEL: @sse2_pslli_d_15
\r
271 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
272 ; CHECK-NEXT: ret <4 x i32> %1
\r
273 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
\r
277 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
278 ; CHECK-LABEL: @sse2_pslli_d_64
\r
279 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
280 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
\r
284 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
285 ; CHECK-LABEL: @sse2_pslli_q_0
\r
286 ; CHECK-NEXT: ret <2 x i64> %v
\r
287 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
\r
291 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
292 ; CHECK-LABEL: @sse2_pslli_q_15
\r
293 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
294 ; CHECK-NEXT: ret <2 x i64> %1
\r
295 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
\r
299 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
300 ; CHECK-LABEL: @sse2_pslli_q_64
\r
301 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
302 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
\r
306 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
307 ; CHECK-LABEL: @avx2_pslli_w_0
\r
308 ; CHECK-NEXT: ret <16 x i16> %v
\r
309 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
\r
313 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
314 ; CHECK-LABEL: @avx2_pslli_w_15
\r
315 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
316 ; CHECK-NEXT: ret <16 x i16> %1
\r
317 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
\r
321 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
322 ; CHECK-LABEL: @avx2_pslli_w_64
\r
323 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
324 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
\r
328 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
329 ; CHECK-LABEL: @avx2_pslli_d_0
\r
330 ; CHECK-NEXT: ret <8 x i32> %v
\r
331 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
\r
335 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
336 ; CHECK-LABEL: @avx2_pslli_d_15
\r
337 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
338 ; CHECK-NEXT: ret <8 x i32> %1
\r
339 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
\r
343 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
344 ; CHECK-LABEL: @avx2_pslli_d_64
\r
345 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
346 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
\r
350 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
351 ; CHECK-LABEL: @avx2_pslli_q_0
\r
352 ; CHECK-NEXT: ret <4 x i64> %v
\r
353 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
\r
357 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
358 ; CHECK-LABEL: @avx2_pslli_q_15
\r
359 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
360 ; CHECK-NEXT: ret <4 x i64> %1
\r
361 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
\r
365 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
366 ; CHECK-LABEL: @avx2_pslli_q_64
\r
367 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
368 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
\r
373 ; ASHR - Constant Vector
\r
376 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
377 ; CHECK-LABEL: @sse2_psra_w_0
\r
378 ; CHECK-NEXT: ret <8 x i16> %v
\r
379 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
383 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
384 ; CHECK-LABEL: @sse2_psra_w_15
\r
385 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
386 ; CHECK-NEXT: ret <8 x i16> %1
\r
387 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
391 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
392 ; CHECK-LABEL: @sse2_psra_w_15_splat
\r
393 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
394 ; CHECK-NEXT: ret <8 x i16> %1
\r
395 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
399 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
400 ; CHECK-LABEL: @sse2_psra_w_64
\r
401 ; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
402 ; CHECK-NEXT: ret <8 x i16> %1
\r
403 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
407 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
408 ; CHECK-LABEL: @sse2_psra_d_0
\r
409 ; CHECK-NEXT: ret <4 x i32> %v
\r
410 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
414 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
415 ; CHECK-LABEL: @sse2_psra_d_15
\r
416 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
417 ; CHECK-NEXT: ret <4 x i32> %1
\r
418 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
422 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
423 ; CHECK-LABEL: @sse2_psra_d_15_splat
\r
424 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
\r
425 ; CHECK-NEXT: ret <4 x i32> %1
\r
426 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
430 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
431 ; CHECK-LABEL: @sse2_psra_d_64
\r
432 ; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
\r
433 ; CHECK-NEXT: ret <4 x i32> %1
\r
434 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
438 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
439 ; CHECK-LABEL: @avx2_psra_w_0
\r
440 ; CHECK-NEXT: ret <16 x i16> %v
\r
441 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
445 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
446 ; CHECK-LABEL: @avx2_psra_w_15
\r
447 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
448 ; CHECK-NEXT: ret <16 x i16> %1
\r
449 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
453 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
454 ; CHECK-LABEL: @avx2_psra_w_15_splat
\r
455 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
456 ; CHECK-NEXT: ret <16 x i16> %1
\r
457 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
461 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
462 ; CHECK-LABEL: @avx2_psra_w_64
\r
463 ; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
464 ; CHECK-NEXT: ret <16 x i16> %1
\r
465 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
469 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
470 ; CHECK-LABEL: @avx2_psra_d_0
\r
471 ; CHECK-NEXT: ret <8 x i32> %v
\r
472 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
476 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
477 ; CHECK-LABEL: @avx2_psra_d_15
\r
478 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
479 ; CHECK-NEXT: ret <8 x i32> %1
\r
480 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
484 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
485 ; CHECK-LABEL: @avx2_psra_d_15_splat
\r
486 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
\r
487 ; CHECK-NEXT: ret <8 x i32> %1
\r
488 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
492 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
493 ; CHECK-LABEL: @avx2_psra_d_64
\r
494 ; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
\r
495 ; CHECK-NEXT: ret <8 x i32> %1
\r
496 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
501 ; LSHR - Constant Vector
\r
504 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
505 ; CHECK-LABEL: @sse2_psrl_w_0
\r
506 ; CHECK-NEXT: ret <8 x i16> %v
\r
507 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
511 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
512 ; CHECK-LABEL: @sse2_psrl_w_15
\r
513 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
514 ; CHECK-NEXT: ret <8 x i16> %1
\r
515 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
519 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
520 ; CHECK-LABEL: @sse2_psrl_w_15_splat
\r
521 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
522 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
526 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
527 ; CHECK-LABEL: @sse2_psrl_w_64
\r
528 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
529 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
533 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
534 ; CHECK-LABEL: @sse2_psrl_d_0
\r
535 ; CHECK-NEXT: ret <4 x i32> %v
\r
536 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
540 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
541 ; CHECK-LABEL: @sse2_psrl_d_15
\r
542 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
543 ; CHECK-NEXT: ret <4 x i32> %1
\r
544 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
548 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
549 ; CHECK-LABEL: @sse2_psrl_d_15_splat
\r
550 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
551 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
555 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
556 ; CHECK-LABEL: @sse2_psrl_d_64
\r
557 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
558 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
562 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
563 ; CHECK-LABEL: @sse2_psrl_q_0
\r
564 ; CHECK-NEXT: ret <2 x i64> %v
\r
565 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
\r
569 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
570 ; CHECK-LABEL: @sse2_psrl_q_15
\r
571 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
572 ; CHECK-NEXT: ret <2 x i64> %1
\r
573 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
577 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
578 ; CHECK-LABEL: @sse2_psrl_q_64
\r
579 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
580 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
584 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
585 ; CHECK-LABEL: @avx2_psrl_w_0
\r
586 ; CHECK-NEXT: ret <16 x i16> %v
\r
587 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
591 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
592 ; CHECK-LABEL: @avx2_psrl_w_15
\r
593 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
594 ; CHECK-NEXT: ret <16 x i16> %1
\r
595 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
599 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
600 ; CHECK-LABEL: @avx2_psrl_w_15_splat
\r
601 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
602 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
606 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
607 ; CHECK-LABEL: @avx2_psrl_w_64
\r
608 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
609 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
613 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
614 ; CHECK-LABEL: @avx2_psrl_d_0
\r
615 ; CHECK-NEXT: ret <8 x i32> %v
\r
616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
620 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
621 ; CHECK-LABEL: @avx2_psrl_d_15
\r
622 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
623 ; CHECK-NEXT: ret <8 x i32> %1
\r
624 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
628 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
629 ; CHECK-LABEL: @avx2_psrl_d_15_splat
\r
630 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
631 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
635 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
636 ; CHECK-LABEL: @avx2_psrl_d_64
\r
637 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
638 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
642 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
643 ; CHECK-LABEL: @avx2_psrl_q_0
\r
644 ; CHECK-NEXT: ret <4 x i64> %v
\r
645 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
\r
649 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
650 ; CHECK-LABEL: @avx2_psrl_q_15
\r
651 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
652 ; CHECK-NEXT: ret <4 x i64> %1
\r
653 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
657 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
658 ; CHECK-LABEL: @avx2_psrl_q_64
\r
659 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
660 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
665 ; SHL - Constant Vector
\r
668 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
669 ; CHECK-LABEL: @sse2_psll_w_0
\r
670 ; CHECK-NEXT: ret <8 x i16> %v
\r
671 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
675 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
676 ; CHECK-LABEL: @sse2_psll_w_15
\r
677 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
678 ; CHECK-NEXT: ret <8 x i16> %1
\r
679 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
683 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
684 ; CHECK-LABEL: @sse2_psll_w_15_splat
\r
685 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
686 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
690 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
691 ; CHECK-LABEL: @sse2_psll_w_64
\r
692 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
693 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
697 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
698 ; CHECK-LABEL: @sse2_psll_d_0
\r
699 ; CHECK-NEXT: ret <4 x i32> %v
\r
700 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
704 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
705 ; CHECK-LABEL: @sse2_psll_d_15
\r
706 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
707 ; CHECK-NEXT: ret <4 x i32> %1
\r
708 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
712 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
713 ; CHECK-LABEL: @sse2_psll_d_15_splat
\r
714 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
715 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
719 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
720 ; CHECK-LABEL: @sse2_psll_d_64
\r
721 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
722 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
726 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
727 ; CHECK-LABEL: @sse2_psll_q_0
\r
728 ; CHECK-NEXT: ret <2 x i64> %v
\r
729 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
\r
733 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
734 ; CHECK-LABEL: @sse2_psll_q_15
\r
735 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
736 ; CHECK-NEXT: ret <2 x i64> %1
\r
737 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
741 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
742 ; CHECK-LABEL: @sse2_psll_q_64
\r
743 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
744 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
748 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
749 ; CHECK-LABEL: @avx2_psll_w_0
\r
750 ; CHECK-NEXT: ret <16 x i16> %v
\r
751 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
755 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
756 ; CHECK-LABEL: @avx2_psll_w_15
\r
757 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
758 ; CHECK-NEXT: ret <16 x i16> %1
\r
759 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
763 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
764 ; CHECK-LABEL: @avx2_psll_w_15_splat
\r
765 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
766 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
770 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
771 ; CHECK-LABEL: @avx2_psll_w_64
\r
772 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
773 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
777 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
778 ; CHECK-LABEL: @avx2_psll_d_0
\r
779 ; CHECK-NEXT: ret <8 x i32> %v
\r
780 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
784 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
785 ; CHECK-LABEL: @avx2_psll_d_15
\r
786 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
787 ; CHECK-NEXT: ret <8 x i32> %1
\r
788 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
792 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
793 ; CHECK-LABEL: @avx2_psll_d_15_splat
\r
794 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
795 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
799 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
800 ; CHECK-LABEL: @avx2_psll_d_64
\r
801 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
802 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
806 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
807 ; CHECK-LABEL: @avx2_psll_q_0
\r
808 ; CHECK-NEXT: ret <4 x i64> %v
\r
809 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
\r
813 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
814 ; CHECK-LABEL: @avx2_psll_q_15
\r
815 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
816 ; CHECK-NEXT: ret <4 x i64> %1
\r
817 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
821 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
822 ; CHECK-LABEL: @avx2_psll_q_64
\r
823 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
824 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
832 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
\r
833 ; CHECK-LABEL: @test_sse2_psra_w_0
\r
834 ; CHECK-NEXT: ret <8 x i16> %A
\r
835 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
\r
836 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
837 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
\r
841 define <8 x i16> @test_sse2_psra_w_8() {
\r
842 ; CHECK-LABEL: @test_sse2_psra_w_8
\r
843 ; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
\r
844 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
\r
845 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
\r
846 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
847 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
\r
851 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
\r
852 ; CHECK-LABEL: @test_sse2_psra_d_0
\r
853 ; CHECK-NEXT: ret <4 x i32> %A
\r
854 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
\r
855 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
\r
856 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
\r
860 define <4 x i32> @sse2_psra_d_8() {
\r
861 ; CHECK-LABEL: @sse2_psra_d_8
\r
862 ; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
\r
863 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
\r
864 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
\r
865 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
\r
866 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
\r
870 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
\r
871 ; CHECK-LABEL: @test_avx2_psra_w_0
\r
872 ; CHECK-NEXT: ret <16 x i16> %A
\r
873 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
\r
874 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
875 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
\r
879 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
\r
880 ; CHECK-LABEL: @test_avx2_psra_w_8
\r
881 ; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
\r
882 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
\r
883 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
\r
884 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
\r
885 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
\r
889 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
\r
890 ; CHECK-LABEL: @test_avx2_psra_d_0
\r
891 ; CHECK-NEXT: ret <8 x i32> %A
\r
892 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
\r
893 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
\r
894 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
\r
898 define <8 x i32> @test_avx2_psra_d_8() {
\r
899 ; CHECK-LABEL: @test_avx2_psra_d_8
\r
900 ; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
\r
901 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
\r
902 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
\r
903 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
\r
904 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
\r
908 define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
\r
909 %S = bitcast i32 1 to i32
\r
910 %1 = zext i32 %S to i64
\r
911 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
912 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
913 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
914 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
915 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
916 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
917 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
918 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
919 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
920 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
921 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
922 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
923 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
924 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
925 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
927 ; CHECK: test_sse2_1
\r
928 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
\r
931 define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
\r
932 %S = bitcast i32 1 to i32
\r
933 %1 = zext i32 %S to i64
\r
934 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
935 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
936 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
937 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
938 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
939 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
940 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
941 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
942 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
943 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
944 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
945 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
946 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
947 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
948 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
950 ; CHECK: test_avx2_1
\r
951 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
\r
954 define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
\r
955 %S = bitcast i32 128 to i32
\r
956 %1 = zext i32 %S to i64
\r
957 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
958 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
959 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
960 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
961 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
962 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
963 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
964 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
965 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
966 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
967 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
968 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
969 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
970 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
971 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
973 ; CHECK: test_sse2_0
\r
974 ; CHECK: ret <2 x i64> zeroinitializer
\r
977 define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
\r
978 %S = bitcast i32 128 to i32
\r
979 %1 = zext i32 %S to i64
\r
980 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
981 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
982 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
983 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
984 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
985 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
986 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
987 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
988 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
989 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
990 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
991 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
992 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
993 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
994 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
996 ; CHECK: test_avx2_0
\r
997 ; CHECK: ret <4 x i64> zeroinitializer
\r
999 define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
\r
1000 %S = bitcast i32 1 to i32
\r
1001 %1 = zext i32 %S to i64
\r
1002 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1003 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1004 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1005 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
\r
1006 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
1007 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1008 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
1009 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
1010 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
1011 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
1012 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
1013 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
1014 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
1015 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
1016 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
1018 ; CHECK: test_sse2_psrl_1
\r
1019 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
\r
1022 define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
\r
1023 %S = bitcast i32 1 to i32
\r
1024 %1 = zext i32 %S to i64
\r
1025 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1026 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1027 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1028 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
1029 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
1030 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1031 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
1032 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
1033 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
1034 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
1035 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
1036 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
1037 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
1038 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
1039 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
1041 ; CHECK: test_avx2_psrl_1
\r
1042 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
\r
1045 define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
\r
1046 %S = bitcast i32 128 to i32
\r
1047 %1 = zext i32 %S to i64
\r
1048 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1049 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1050 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1051 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
\r
1052 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
1053 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1054 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
1055 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
1056 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
1057 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
1058 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
1059 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
1060 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
1061 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
1062 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
1064 ; CHECK: test_sse2_psrl_0
\r
1065 ; CHECK: ret <2 x i64> zeroinitializer
\r
1068 define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
\r
1069 %S = bitcast i32 128 to i32
\r
1070 %1 = zext i32 %S to i64
\r
1071 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
1072 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
1073 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
1074 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
1075 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
1076 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
1077 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
1078 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
1079 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
1080 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
1081 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
1082 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
1083 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
1084 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
1085 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
1087 ; CHECK: test_avx2_psrl_0
\r
1088 ; CHECK: ret <4 x i64> zeroinitializer
\r
1091 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
\r
1092 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
\r
1093 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
\r
1094 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
\r
1095 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
\r
1096 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
\r
1097 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
\r
1098 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
\r
1099 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
\r
1100 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
\r
1101 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
\r
1102 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
\r
1104 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
\r
1105 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
\r
1106 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
\r
1107 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
\r
1108 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
\r
1109 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
\r
1110 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
\r
1111 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
\r
1112 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
\r
1113 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
\r
1114 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
\r
1115 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
\r
1117 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
\r
1118 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
\r
1119 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
\r
1120 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
\r
1121 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
\r
1122 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
\r
1123 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
\r
1124 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
\r
1126 attributes #1 = { nounwind readnone }
\r