1 ; RUN: opt < %s -instcombine -S | FileCheck %s
\r
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
\r
8 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
9 ; CHECK-LABEL: @sse2_psrli_w_0
\r
10 ; CHECK: ret <8 x i16> %v
\r
11 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
\r
15 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
16 ; CHECK-LABEL: @sse2_psrli_w_15
\r
17 ; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
18 ; CHECK: ret <8 x i16> %1
\r
19 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
\r
23 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
24 ; CHECK-LABEL: @sse2_psrli_w_64
\r
25 ; CHECK: ret <8 x i16> zeroinitializer
\r
26 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
\r
30 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
31 ; CHECK-LABEL: @sse2_psrli_d_0
\r
32 ; CHECK: ret <4 x i32> %v
\r
33 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
\r
37 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
38 ; CHECK-LABEL: @sse2_psrli_d_15
\r
39 ; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
40 ; CHECK: ret <4 x i32> %1
\r
41 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
\r
45 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
46 ; CHECK-LABEL: @sse2_psrli_d_64
\r
47 ; CHECK: ret <4 x i32> zeroinitializer
\r
48 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
\r
52 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
53 ; CHECK-LABEL: @sse2_psrli_q_0
\r
54 ; CHECK: ret <2 x i64> %v
\r
55 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
\r
59 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
60 ; CHECK-LABEL: @sse2_psrli_q_15
\r
61 ; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
62 ; CHECK: ret <2 x i64> %1
\r
63 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
\r
67 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
68 ; CHECK-LABEL: @sse2_psrli_q_64
\r
69 ; CHECK: ret <2 x i64> zeroinitializer
\r
70 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
\r
74 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
75 ; CHECK-LABEL: @avx2_psrli_w_0
\r
76 ; CHECK: ret <16 x i16> %v
\r
77 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
\r
81 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
82 ; CHECK-LABEL: @avx2_psrli_w_15
\r
83 ; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
84 ; CHECK: ret <16 x i16> %1
\r
85 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
\r
89 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
90 ; CHECK-LABEL: @avx2_psrli_w_64
\r
91 ; CHECK: ret <16 x i16> zeroinitializer
\r
92 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
\r
96 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
97 ; CHECK-LABEL: @avx2_psrli_d_0
\r
98 ; CHECK: ret <8 x i32> %v
\r
99 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
\r
103 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
104 ; CHECK-LABEL: @avx2_psrli_d_15
\r
105 ; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
106 ; CHECK: ret <8 x i32> %1
\r
107 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
\r
111 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
112 ; CHECK-LABEL: @avx2_psrli_d_64
\r
113 ; CHECK: ret <8 x i32> zeroinitializer
\r
114 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
\r
118 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
119 ; CHECK-LABEL: @avx2_psrli_q_0
\r
120 ; CHECK: ret <4 x i64> %v
\r
121 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
\r
125 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
126 ; CHECK-LABEL: @avx2_psrli_q_15
\r
127 ; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
128 ; CHECK: ret <4 x i64> %1
\r
129 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
\r
133 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
134 ; CHECK-LABEL: @avx2_psrli_q_64
\r
135 ; CHECK: ret <4 x i64> zeroinitializer
\r
136 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
\r
144 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
\r
145 ; CHECK-LABEL: @sse2_pslli_w_0
\r
146 ; CHECK: ret <8 x i16> %v
\r
147 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
\r
151 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
152 ; CHECK-LABEL: @sse2_pslli_w_15
\r
153 ; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
154 ; CHECK: ret <8 x i16> %1
\r
155 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
\r
159 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
160 ; CHECK-LABEL: @sse2_pslli_w_64
\r
161 ; CHECK: ret <8 x i16> zeroinitializer
\r
162 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
\r
166 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
\r
167 ; CHECK-LABEL: @sse2_pslli_d_0
\r
168 ; CHECK: ret <4 x i32> %v
\r
169 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
\r
173 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
174 ; CHECK-LABEL: @sse2_pslli_d_15
\r
175 ; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
176 ; CHECK: ret <4 x i32> %1
\r
177 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
\r
181 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
182 ; CHECK-LABEL: @sse2_pslli_d_64
\r
183 ; CHECK: ret <4 x i32> zeroinitializer
\r
184 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
\r
188 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
\r
189 ; CHECK-LABEL: @sse2_pslli_q_0
\r
190 ; CHECK: ret <2 x i64> %v
\r
191 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
\r
195 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
196 ; CHECK-LABEL: @sse2_pslli_q_15
\r
197 ; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
198 ; CHECK: ret <2 x i64> %1
\r
199 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
\r
203 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
204 ; CHECK-LABEL: @sse2_pslli_q_64
\r
205 ; CHECK: ret <2 x i64> zeroinitializer
\r
206 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
\r
210 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
\r
211 ; CHECK-LABEL: @avx2_pslli_w_0
\r
212 ; CHECK: ret <16 x i16> %v
\r
213 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
\r
217 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
218 ; CHECK-LABEL: @avx2_pslli_w_15
\r
219 ; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
220 ; CHECK: ret <16 x i16> %1
\r
221 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
\r
225 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
226 ; CHECK-LABEL: @avx2_pslli_w_64
\r
227 ; CHECK: ret <16 x i16> zeroinitializer
\r
228 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
\r
232 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
\r
233 ; CHECK-LABEL: @avx2_pslli_d_0
\r
234 ; CHECK: ret <8 x i32> %v
\r
235 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
\r
239 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
240 ; CHECK-LABEL: @avx2_pslli_d_15
\r
241 ; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
242 ; CHECK: ret <8 x i32> %1
\r
243 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
\r
247 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
248 ; CHECK-LABEL: @avx2_pslli_d_64
\r
249 ; CHECK: ret <8 x i32> zeroinitializer
\r
250 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
\r
254 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
\r
255 ; CHECK-LABEL: @avx2_pslli_q_0
\r
256 ; CHECK: ret <4 x i64> %v
\r
257 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
\r
261 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
262 ; CHECK-LABEL: @avx2_pslli_q_15
\r
263 ; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
264 ; CHECK: ret <4 x i64> %1
\r
265 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
\r
269 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
270 ; CHECK-LABEL: @avx2_pslli_q_64
\r
271 ; CHECK: ret <4 x i64> zeroinitializer
\r
272 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
\r
277 ; LSHR - Constant Vector
\r
280 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
281 ; CHECK-LABEL: @sse2_psrl_w_15
\r
282 ; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
283 ; CHECK: ret <8 x i16> %1
\r
284 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
288 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
289 ; CHECK-LABEL: @sse2_psrl_w_64
\r
290 ; CHECK: ret <8 x i16> zeroinitializer
\r
291 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
295 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
296 ; CHECK-LABEL: @sse2_psrl_d_15
\r
297 ; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
298 ; CHECK: ret <4 x i32> %1
\r
299 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
303 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
304 ; CHECK-LABEL: @sse2_psrl_d_64
\r
305 ; CHECK: ret <4 x i32> zeroinitializer
\r
306 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
310 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
311 ; CHECK-LABEL: @sse2_psrl_q_15
\r
312 ; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
\r
313 ; CHECK: ret <2 x i64> %1
\r
314 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
318 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
319 ; CHECK-LABEL: @sse2_psrl_q_64
\r
320 ; CHECK: ret <2 x i64> zeroinitializer
\r
321 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
325 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
326 ; CHECK-LABEL: @avx2_psrl_w_15
\r
327 ; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
328 ; CHECK: ret <16 x i16> %1
\r
329 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
333 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
334 ; CHECK-LABEL: @avx2_psrl_w_64
\r
335 ; CHECK: ret <16 x i16> zeroinitializer
\r
336 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
340 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
341 ; CHECK-LABEL: @avx2_psrl_d_15
\r
342 ; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
343 ; CHECK: ret <8 x i32> %1
\r
344 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
348 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
349 ; CHECK-LABEL: @avx2_psrl_d_64
\r
350 ; CHECK: ret <8 x i32> zeroinitializer
\r
351 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
355 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
356 ; CHECK-LABEL: @avx2_psrl_q_15
\r
357 ; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
358 ; CHECK: ret <4 x i64> %1
\r
359 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
363 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
364 ; CHECK-LABEL: @avx2_psrl_q_64
\r
365 ; CHECK: ret <4 x i64> zeroinitializer
\r
366 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
371 ; SHL - Constant Vector
\r
374 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
\r
375 ; CHECK-LABEL: @sse2_psll_w_15
\r
376 ; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
377 ; CHECK: ret <8 x i16> %1
\r
378 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
382 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
\r
383 ; CHECK-LABEL: @sse2_psll_w_64
\r
384 ; CHECK: ret <8 x i16> zeroinitializer
\r
385 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
389 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
\r
390 ; CHECK-LABEL: @sse2_psll_d_15
\r
391 ; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
\r
392 ; CHECK: ret <4 x i32> %1
\r
393 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
397 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
\r
398 ; CHECK-LABEL: @sse2_psll_d_64
\r
399 ; CHECK: ret <4 x i32> zeroinitializer
\r
400 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
404 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
\r
405 ; CHECK-LABEL: @sse2_psll_q_15
\r
406 ; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
\r
407 ; CHECK: ret <2 x i64> %1
\r
408 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
412 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
\r
413 ; CHECK-LABEL: @sse2_psll_q_64
\r
414 ; CHECK: ret <2 x i64> zeroinitializer
\r
415 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
419 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
\r
420 ; CHECK-LABEL: @avx2_psll_w_15
\r
421 ; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
\r
422 ; CHECK: ret <16 x i16> %1
\r
423 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
427 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
\r
428 ; CHECK-LABEL: @avx2_psll_w_64
\r
429 ; CHECK: ret <16 x i16> zeroinitializer
\r
430 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
\r
434 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
\r
435 ; CHECK-LABEL: @avx2_psll_d_15
\r
436 ; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
\r
437 ; CHECK: ret <8 x i32> %1
\r
438 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
\r
442 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
\r
443 ; CHECK-LABEL: @avx2_psll_d_64
\r
444 ; CHECK: ret <8 x i32> zeroinitializer
\r
445 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
\r
449 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
\r
450 ; CHECK-LABEL: @avx2_psll_q_15
\r
451 ; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
\r
452 ; CHECK: ret <4 x i64> %1
\r
453 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
\r
457 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
\r
458 ; CHECK-LABEL: @avx2_psll_q_64
\r
459 ; CHECK: ret <4 x i64> zeroinitializer
\r
460 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
\r
468 define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
\r
469 %S = bitcast i32 1 to i32
\r
470 %1 = zext i32 %S to i64
\r
471 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
472 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
473 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
474 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
475 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
476 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
477 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
478 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
479 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
480 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
481 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
482 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
483 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
484 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
485 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
487 ; CHECK: test_sse2_1
\r
488 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
\r
491 define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
\r
492 %S = bitcast i32 1 to i32
\r
493 %1 = zext i32 %S to i64
\r
494 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
495 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
496 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
497 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
498 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
499 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
500 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
501 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
502 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
503 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
504 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
505 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
506 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
507 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
508 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
510 ; CHECK: test_avx2_1
\r
511 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
\r
514 define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
\r
515 %S = bitcast i32 128 to i32
\r
516 %1 = zext i32 %S to i64
\r
517 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
518 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
519 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
520 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
\r
521 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
522 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
523 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
\r
524 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
525 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
\r
526 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
527 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
\r
528 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
529 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
\r
530 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
531 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
\r
533 ; CHECK: test_sse2_0
\r
534 ; CHECK: ret <2 x i64> zeroinitializer
\r
537 define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
\r
538 %S = bitcast i32 128 to i32
\r
539 %1 = zext i32 %S to i64
\r
540 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
541 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
542 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
543 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
544 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
545 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
546 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
\r
547 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
548 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
\r
549 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
550 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
\r
551 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
552 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
\r
553 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
554 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
\r
556 ; CHECK: test_avx2_0
\r
557 ; CHECK: ret <4 x i64> zeroinitializer
\r
559 define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
\r
560 %S = bitcast i32 1 to i32
\r
561 %1 = zext i32 %S to i64
\r
562 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
563 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
564 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
565 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
\r
566 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
567 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
568 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
569 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
570 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
571 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
572 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
573 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
574 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
575 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
576 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
578 ; CHECK: test_sse2_psrl_1
\r
579 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
\r
582 define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
\r
583 %S = bitcast i32 1 to i32
\r
584 %1 = zext i32 %S to i64
\r
585 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
586 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
587 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
588 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
589 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
590 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
591 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
592 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
593 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
594 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
595 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
596 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
597 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
598 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
599 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
601 ; CHECK: test_avx2_psrl_1
\r
602 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
\r
605 define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
\r
606 %S = bitcast i32 128 to i32
\r
607 %1 = zext i32 %S to i64
\r
608 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
609 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
610 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
611 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
\r
612 %6 = bitcast <8 x i16> %5 to <4 x i32>
\r
613 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
614 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
\r
615 %9 = bitcast <4 x i32> %8 to <2 x i64>
\r
616 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
\r
617 %11 = bitcast <2 x i64> %10 to <8 x i16>
\r
618 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
\r
619 %13 = bitcast <8 x i16> %12 to <4 x i32>
\r
620 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
\r
621 %15 = bitcast <4 x i32> %14 to <2 x i64>
\r
622 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
\r
624 ; CHECK: test_sse2_psrl_0
\r
625 ; CHECK: ret <2 x i64> zeroinitializer
\r
628 define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
\r
629 %S = bitcast i32 128 to i32
\r
630 %1 = zext i32 %S to i64
\r
631 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
\r
632 %3 = insertelement <2 x i64> %2, i64 0, i32 1
\r
633 %4 = bitcast <2 x i64> %3 to <8 x i16>
\r
634 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
\r
635 %6 = bitcast <16 x i16> %5 to <8 x i32>
\r
636 %7 = bitcast <2 x i64> %3 to <4 x i32>
\r
637 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
\r
638 %9 = bitcast <8 x i32> %8 to <4 x i64>
\r
639 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
\r
640 %11 = bitcast <4 x i64> %10 to <16 x i16>
\r
641 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
\r
642 %13 = bitcast <16 x i16> %12 to <8 x i32>
\r
643 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
\r
644 %15 = bitcast <8 x i32> %14 to <4 x i64>
\r
645 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
\r
647 ; CHECK: test_avx2_psrl_0
\r
648 ; CHECK: ret <4 x i64> zeroinitializer
\r
651 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
\r
652 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
\r
653 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
\r
654 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
\r
655 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
\r
656 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
\r
657 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
\r
658 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
\r
659 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
\r
660 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
\r
661 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
\r
662 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
\r
663 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
\r
664 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
\r
665 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
\r
666 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
\r
667 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
\r
668 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
\r
669 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
\r
670 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
\r
671 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
\r
672 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
\r
673 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
\r
674 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
\r
676 attributes #1 = { nounwind readnone }
\r