1 ; RUN: opt < %s -instcombine -S | FileCheck %s
\r
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
\r
8 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
9 ; CHECK-LABEL: @sse2_psrli_w_0
\r
10 ; CHECK-NEXT: ret <8 x i16> %v
\r
11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
\r
15 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
16 ; CHECK-LABEL: @sse2_psrli_w_15
\r
17 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
18 ; CHECK-NEXT: ret <8 x i16> %1
\r
19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
\r
23 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
24 ; CHECK-LABEL: @sse2_psrli_w_64
\r
25 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
26 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
\r
30 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
31 ; CHECK-LABEL: @sse2_psrli_d_0
\r
32 ; CHECK-NEXT: ret <4 x i32> %v
\r
33 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
\r
37 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
38 ; CHECK-LABEL: @sse2_psrli_d_15
\r
39 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
40 ; CHECK-NEXT: ret <4 x i32> %1
\r
41 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
\r
45 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
46 ; CHECK-LABEL: @sse2_psrli_d_64
\r
47 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
48 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
\r
52 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
53 ; CHECK-LABEL: @sse2_psrli_q_0
\r
54 ; CHECK-NEXT: ret <2 x i64> %v
\r
55 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
\r
59 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
60 ; CHECK-LABEL: @sse2_psrli_q_15
\r
61 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
62 ; CHECK-NEXT: ret <2 x i64> %1
\r
63 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
\r
67 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
68 ; CHECK-LABEL: @sse2_psrli_q_64
\r
69 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
70 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
\r
74 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
75 ; CHECK-LABEL: @avx2_psrli_w_0
\r
76 ; CHECK-NEXT: ret <16 x i16> %v
\r
77 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
\r
81 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
82 ; CHECK-LABEL: @avx2_psrli_w_15
\r
83 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
84 ; CHECK-NEXT: ret <16 x i16> %1
\r
85 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
\r
89 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
90 ; CHECK-LABEL: @avx2_psrli_w_64
\r
91 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
92 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
\r
96 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
97 ; CHECK-LABEL: @avx2_psrli_d_0
\r
98 ; CHECK-NEXT: ret <8 x i32> %v
\r
99 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
\r
103 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
104 ; CHECK-LABEL: @avx2_psrli_d_15
\r
105 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
106 ; CHECK-NEXT: ret <8 x i32> %1
\r
107 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
\r
111 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
112 ; CHECK-LABEL: @avx2_psrli_d_64
\r
113 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
114 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
\r
118 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
119 ; CHECK-LABEL: @avx2_psrli_q_0
\r
120 ; CHECK-NEXT: ret <4 x i64> %v
\r
121 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
\r
125 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
126 ; CHECK-LABEL: @avx2_psrli_q_15
\r
127 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
128 ; CHECK-NEXT: ret <4 x i64> %1
\r
129 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
\r
133 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
134 ; CHECK-LABEL: @avx2_psrli_q_64
\r
135 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
136 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
\r
144 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
145 ; CHECK-LABEL: @sse2_pslli_w_0
\r
146 ; CHECK-NEXT: ret <8 x i16> %v
\r
147 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
\r
151 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
152 ; CHECK-LABEL: @sse2_pslli_w_15
\r
153 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
154 ; CHECK-NEXT: ret <8 x i16> %1
\r
155 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
\r
159 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
160 ; CHECK-LABEL: @sse2_pslli_w_64
\r
161 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
162 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
\r
166 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
167 ; CHECK-LABEL: @sse2_pslli_d_0
\r
168 ; CHECK-NEXT: ret <4 x i32> %v
\r
169 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
\r
173 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
174 ; CHECK-LABEL: @sse2_pslli_d_15
\r
175 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
176 ; CHECK-NEXT: ret <4 x i32> %1
\r
177 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
\r
181 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
182 ; CHECK-LABEL: @sse2_pslli_d_64
\r
183 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
184 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
\r
188 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
189 ; CHECK-LABEL: @sse2_pslli_q_0
\r
190 ; CHECK-NEXT: ret <2 x i64> %v
\r
191 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
\r
195 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
196 ; CHECK-LABEL: @sse2_pslli_q_15
\r
197 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
198 ; CHECK-NEXT: ret <2 x i64> %1
\r
199 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
\r
203 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
204 ; CHECK-LABEL: @sse2_pslli_q_64
\r
205 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
206 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
\r
210 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
211 ; CHECK-LABEL: @avx2_pslli_w_0
\r
212 ; CHECK-NEXT: ret <16 x i16> %v
\r
213 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
\r
217 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
218 ; CHECK-LABEL: @avx2_pslli_w_15
\r
219 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
220 ; CHECK-NEXT: ret <16 x i16> %1
\r
221 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
\r
225 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
226 ; CHECK-LABEL: @avx2_pslli_w_64
\r
227 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
228 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
\r
232 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
233 ; CHECK-LABEL: @avx2_pslli_d_0
\r
234 ; CHECK-NEXT: ret <8 x i32> %v
\r
235 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
\r
239 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
240 ; CHECK-LABEL: @avx2_pslli_d_15
\r
241 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
242 ; CHECK-NEXT: ret <8 x i32> %1
\r
243 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
\r
247 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
248 ; CHECK-LABEL: @avx2_pslli_d_64
\r
249 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
250 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
\r
254 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
255 ; CHECK-LABEL: @avx2_pslli_q_0
\r
256 ; CHECK-NEXT: ret <4 x i64> %v
\r
257 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
\r
261 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
262 ; CHECK-LABEL: @avx2_pslli_q_15
\r
263 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
264 ; CHECK-NEXT: ret <4 x i64> %1
\r
265 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
\r
269 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
270 ; CHECK-LABEL: @avx2_pslli_q_64
\r
271 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
272 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
\r
277 ; LSHR - Constant Vector
\r
280 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
281 ; CHECK-LABEL: @sse2_psrl_w_0
\r
282 ; CHECK-NEXT: ret <8 x i16> %v
\r
283 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
287 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
288 ; CHECK-LABEL: @sse2_psrl_w_15
\r
289 ; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
290 ; CHECK-NEXT: ret <8 x i16> %1
\r
291 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
295 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
296 ; CHECK-LABEL: @sse2_psrl_w_15_splat
\r
297 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
298 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
302 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
303 ; CHECK-LABEL: @sse2_psrl_w_64
\r
304 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
305 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
309 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
310 ; CHECK-LABEL: @sse2_psrl_d_0
\r
311 ; CHECK-NEXT: ret <4 x i32> %v
\r
312 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
316 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
317 ; CHECK-LABEL: @sse2_psrl_d_15
\r
318 ; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
319 ; CHECK-NEXT: ret <4 x i32> %1
\r
320 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
324 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
325 ; CHECK-LABEL: @sse2_psrl_d_15_splat
\r
326 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
327 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
331 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
332 ; CHECK-LABEL: @sse2_psrl_d_64
\r
333 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
334 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
338 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
339 ; CHECK-LABEL: @sse2_psrl_q_0
\r
340 ; CHECK-NEXT: ret <2 x i64> %v
\r
341 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
\r
345 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
346 ; CHECK-LABEL: @sse2_psrl_q_15
\r
347 ; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
348 ; CHECK-NEXT: ret <2 x i64> %1
\r
349 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
353 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
354 ; CHECK-LABEL: @sse2_psrl_q_64
\r
355 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
356 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
360 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
361 ; CHECK-LABEL: @avx2_psrl_w_0
\r
362 ; CHECK-NEXT: ret <16 x i16> %v
\r
363 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
367 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
368 ; CHECK-LABEL: @avx2_psrl_w_15
\r
369 ; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
370 ; CHECK-NEXT: ret <16 x i16> %1
\r
371 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
375 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
376 ; CHECK-LABEL: @avx2_psrl_w_15_splat
\r
377 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
378 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
382 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
383 ; CHECK-LABEL: @avx2_psrl_w_64
\r
384 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
385 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
389 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
390 ; CHECK-LABEL: @avx2_psrl_d_0
\r
391 ; CHECK-NEXT: ret <8 x i32> %v
\r
392 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
396 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
397 ; CHECK-LABEL: @avx2_psrl_d_15
\r
398 ; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
399 ; CHECK-NEXT: ret <8 x i32> %1
\r
400 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
404 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
405 ; CHECK-LABEL: @avx2_psrl_d_15_splat
\r
406 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
407 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
411 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
412 ; CHECK-LABEL: @avx2_psrl_d_64
\r
413 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
414 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
418 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
419 ; CHECK-LABEL: @avx2_psrl_q_0
\r
420 ; CHECK-NEXT: ret <4 x i64> %v
\r
421 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
\r
425 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
426 ; CHECK-LABEL: @avx2_psrl_q_15
\r
427 ; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
428 ; CHECK-NEXT: ret <4 x i64> %1
\r
429 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
433 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
434 ; CHECK-LABEL: @avx2_psrl_q_64
\r
435 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
436 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
441 ; SHL - Constant Vector
\r
444 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
445 ; CHECK-LABEL: @sse2_psll_w_0
\r
446 ; CHECK-NEXT: ret <8 x i16> %v
\r
447 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
\r
451 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
452 ; CHECK-LABEL: @sse2_psll_w_15
\r
453 ; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
454 ; CHECK-NEXT: ret <8 x i16> %1
\r
455 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
459 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
\r
460 ; CHECK-LABEL: @sse2_psll_w_15_splat
\r
461 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
462 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
466 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
467 ; CHECK-LABEL: @sse2_psll_w_64
\r
468 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
\r
469 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
473 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
474 ; CHECK-LABEL: @sse2_psll_d_0
\r
475 ; CHECK-NEXT: ret <4 x i32> %v
\r
476 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
\r
480 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
481 ; CHECK-LABEL: @sse2_psll_d_15
\r
482 ; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
483 ; CHECK-NEXT: ret <4 x i32> %1
\r
484 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
488 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
\r
489 ; CHECK-LABEL: @sse2_psll_d_15_splat
\r
490 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
491 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
495 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
496 ; CHECK-LABEL: @sse2_psll_d_64
\r
497 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
\r
498 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
502 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
503 ; CHECK-LABEL: @sse2_psll_q_0
\r
504 ; CHECK-NEXT: ret <2 x i64> %v
\r
505 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
\r
509 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
510 ; CHECK-LABEL: @sse2_psll_q_15
\r
511 ; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
512 ; CHECK-NEXT: ret <2 x i64> %1
\r
513 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
517 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
518 ; CHECK-LABEL: @sse2_psll_q_64
\r
519 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
\r
520 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
524 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
525 ; CHECK-LABEL: @avx2_psll_w_0
\r
526 ; CHECK-NEXT: ret <16 x i16> %v
\r
527 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
\r
531 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
532 ; CHECK-LABEL: @avx2_psll_w_15
\r
533 ; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
534 ; CHECK-NEXT: ret <16 x i16> %1
\r
535 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
539 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
\r
540 ; CHECK-LABEL: @avx2_psll_w_15_splat
\r
541 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
542 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
\r
546 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
547 ; CHECK-LABEL: @avx2_psll_w_64
\r
548 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
\r
549 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
553 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
554 ; CHECK-LABEL: @avx2_psll_d_0
\r
555 ; CHECK-NEXT: ret <8 x i32> %v
\r
556 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
\r
560 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
561 ; CHECK-LABEL: @avx2_psll_d_15
\r
562 ; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
563 ; CHECK-NEXT: ret <8 x i32> %1
\r
564 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
568 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
\r
569 ; CHECK-LABEL: @avx2_psll_d_15_splat
\r
570 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
571 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
\r
575 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
576 ; CHECK-LABEL: @avx2_psll_d_64
\r
577 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
\r
578 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
582 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
583 ; CHECK-LABEL: @avx2_psll_q_0
\r
584 ; CHECK-NEXT: ret <4 x i64> %v
\r
585 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
\r
589 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
590 ; CHECK-LABEL: @avx2_psll_q_15
\r
591 ; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
592 ; CHECK-NEXT: ret <4 x i64> %1
\r
593 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
597 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
598 ; CHECK-LABEL: @avx2_psll_q_64
\r
599 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
\r
600 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
608 define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
\r
609 %S = bitcast i32 1 to i32
\r
610 %1 = zext i32 %S to i64
\r
611 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
612 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
613 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
614 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
615 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
616 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
617 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
618 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
619 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
620 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
621 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
622 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
623 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
624 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
625 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
627 ; CHECK: test_sse2_1
\r
628 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
\r
631 define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
\r
632 %S = bitcast i32 1 to i32
\r
633 %1 = zext i32 %S to i64
\r
634 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
635 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
636 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
637 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
638 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
639 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
640 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
641 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
642 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
643 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
644 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
645 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
646 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
647 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
648 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
650 ; CHECK: test_avx2_1
\r
651 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
\r
654 define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
\r
655 %S = bitcast i32 128 to i32
\r
656 %1 = zext i32 %S to i64
\r
657 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
658 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
659 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
660 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
661 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
662 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
663 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
664 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
665 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
666 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
667 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
668 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
669 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
670 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
671 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
673 ; CHECK: test_sse2_0
\r
674 ; CHECK: ret <2 x i64> zeroinitializer
\r
677 define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
\r
678 %S = bitcast i32 128 to i32
\r
679 %1 = zext i32 %S to i64
\r
680 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
681 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
682 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
683 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
684 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
685 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
686 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
687 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
688 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
689 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
690 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
691 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
692 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
693 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
694 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
696 ; CHECK: test_avx2_0
\r
697 ; CHECK: ret <4 x i64> zeroinitializer
\r
699 define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
\r
700 %S = bitcast i32 1 to i32
\r
701 %1 = zext i32 %S to i64
\r
702 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
703 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
704 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
705 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
\r
706 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
707 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
708 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
709 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
710 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
711 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
712 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
713 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
714 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
715 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
716 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
718 ; CHECK: test_sse2_psrl_1
\r
719 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
\r
722 define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
\r
723 %S = bitcast i32 1 to i32
\r
724 %1 = zext i32 %S to i64
\r
725 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
726 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
727 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
728 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
729 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
730 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
731 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
732 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
733 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
734 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
735 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
736 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
737 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
738 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
739 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
741 ; CHECK: test_avx2_psrl_1
\r
742 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
\r
745 define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
\r
746 %S = bitcast i32 128 to i32
\r
747 %1 = zext i32 %S to i64
\r
748 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
749 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
750 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
751 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
\r
752 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
753 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
754 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
755 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
756 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
757 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
758 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
759 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
760 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
761 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
762 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
764 ; CHECK: test_sse2_psrl_0
\r
765 ; CHECK: ret <2 x i64> zeroinitializer
\r
768 define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
\r
769 %S = bitcast i32 128 to i32
\r
770 %1 = zext i32 %S to i64
\r
771 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
772 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
773 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
774 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
775 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
776 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
777 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
778 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
779 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
780 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
781 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
782 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
783 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
784 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
785 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
787 ; CHECK: test_avx2_psrl_0
\r
788 ; CHECK: ret <4 x i64> zeroinitializer
\r
791 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
\r
792 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
\r
793 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
\r
794 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
\r
795 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
\r
796 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
\r
797 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
\r
798 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
\r
799 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
\r
800 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
\r
801 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
\r
802 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
\r
804 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
\r
805 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
\r
806 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
\r
807 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
\r
808 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
\r
809 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
\r
810 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
\r
811 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
\r
812 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
\r
813 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
\r
814 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
\r
815 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
\r
817 attributes #1 = { nounwind readnone }
\r