1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
3 ; AVX2 Logical Shift Left
5 define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
7 %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
11 ; CHECK-LABEL: test_sllw_1:
12 ; CHECK-NOT: vpsllw $0, %ymm0, %ymm0
15 define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
17 %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
21 ; CHECK-LABEL: test_sllw_2:
22 ; CHECK: vpaddw %ymm0, %ymm0, %ymm0
25 define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
27 %shl = shl <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
31 ; CHECK-LABEL: test_sllw_3:
32 ; CHECK: vpsllw $15, %ymm0, %ymm0
35 define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
37 %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
41 ; CHECK-LABEL: test_slld_1:
42 ; CHECK-NOT: vpslld $0, %ymm0, %ymm0
45 define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
47 %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
51 ; CHECK-LABEL: test_slld_2:
52 ; CHECK: vpaddd %ymm0, %ymm0, %ymm0
55 define <8 x i32> @test_vpslld_var(i32 %shift) {
56 %amt = insertelement <8 x i32> undef, i32 %shift, i32 0
57 %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
61 ; CHECK-LABEL: test_vpslld_var:
62 ; CHECK: vpslld %xmm0, %ymm1, %ymm0
65 define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
67 %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
71 ; CHECK-LABEL: test_slld_3:
72 ; CHECK: vpslld $31, %ymm0, %ymm0
75 define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
77 %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
81 ; CHECK-LABEL: test_sllq_1:
82 ; CHECK-NOT: vpsllq $0, %ymm0, %ymm0
85 define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
87 %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
91 ; CHECK-LABEL: test_sllq_2:
92 ; CHECK: vpaddq %ymm0, %ymm0, %ymm0
95 define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
97 %shl = shl <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
101 ; CHECK-LABEL: test_sllq_3:
102 ; CHECK: vpsllq $63, %ymm0, %ymm0
105 ; AVX2 Arithmetic Shift
107 define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
109 %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
113 ; CHECK-LABEL: test_sraw_1:
114 ; CHECK-NOT: vpsraw $0, %ymm0, %ymm0
117 define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
119 %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123 ; CHECK-LABEL: test_sraw_2:
124 ; CHECK: vpsraw $1, %ymm0, %ymm0
127 define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
129 %shl = ashr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
133 ; CHECK-LABEL: test_sraw_3:
134 ; CHECK: vpsraw $15, %ymm0, %ymm0
137 define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
139 %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
143 ; CHECK-LABEL: test_srad_1:
144 ; CHECK-NOT: vpsrad $0, %ymm0, %ymm0
147 define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
149 %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
153 ; CHECK-LABEL: test_srad_2:
154 ; CHECK: vpsrad $1, %ymm0, %ymm0
157 define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
159 %shl = ashr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
163 ; CHECK-LABEL: test_srad_3:
164 ; CHECK: vpsrad $31, %ymm0, %ymm0
167 ; SSE Logical Shift Right
169 define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
171 %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
175 ; CHECK-LABEL: test_srlw_1:
176 ; CHECK-NOT: vpsrlw $0, %ymm0, %ymm0
179 define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
181 %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
185 ; CHECK-LABEL: test_srlw_2:
186 ; CHECK: vpsrlw $1, %ymm0, %ymm0
189 define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
191 %shl = lshr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
195 ; CHECK-LABEL: test_srlw_3:
196 ; CHECK: vpsrlw $15, %ymm0, %ymm0
199 define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
201 %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
205 ; CHECK-LABEL: test_srld_1:
206 ; CHECK-NOT: vpsrld $0, %ymm0, %ymm0
209 define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
211 %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
215 ; CHECK-LABEL: test_srld_2:
216 ; CHECK: vpsrld $1, %ymm0, %ymm0
219 define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
221 %shl = lshr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
225 ; CHECK-LABEL: test_srld_3:
226 ; CHECK: vpsrld $31, %ymm0, %ymm0
229 define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
231 %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
235 ; CHECK-LABEL: test_srlq_1:
236 ; CHECK-NOT: vpsrlq $0, %ymm0, %ymm0
239 define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
241 %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
245 ; CHECK-LABEL: test_srlq_2:
246 ; CHECK: vpsrlq $1, %ymm0, %ymm0
249 define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
251 %shl = lshr <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
255 ; CHECK-LABEL: test_srlq_3:
256 ; CHECK: vpsrlq $63, %ymm0, %ymm0
259 ; CHECK-LABEL: @srl_trunc_and_v4i64
261 ; CHECK-NEXT: vpsrlvd
263 define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
265 %trunc = trunc <4 x i64> %and to <4 x i32>
266 %sra = lshr <4 x i32> %x, %trunc
271 ; Vectorized byte shifts
274 define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
275 ; CHECK-LABEL: shl_8i16
276 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
277 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
278 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
279 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
280 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
282 %shl = shl <8 x i16> %r, %a
286 define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
287 ; CHECK-LABEL: shl_16i16
288 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
289 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
290 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
291 ; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
292 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
293 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
294 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
295 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
296 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
297 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
299 %shl = shl <16 x i16> %r, %a
303 define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
304 ; CHECK-LABEL: ashr_8i16
305 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
306 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
307 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
308 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
309 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
311 %ashr = ashr <8 x i16> %r, %a
315 define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
316 ; CHECK-LABEL: ashr_16i16
317 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
318 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
319 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
320 ; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3
321 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
322 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
323 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
324 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
325 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
326 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
328 %ashr = ashr <16 x i16> %r, %a
332 define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
333 ; CHECK-LABEL: lshr_8i16
334 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
335 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
336 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
337 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
338 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
340 %lshr = lshr <8 x i16> %r, %a
344 define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
345 ; CHECK-LABEL: lshr_16i16
346 ; CHECK: vpxor %ymm2, %ymm2, %ymm2
347 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
348 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
349 ; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
350 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
351 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
352 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
353 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
354 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
355 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
357 %lshr = lshr <16 x i16> %r, %a