1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
9 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10 ; ALL-LABEL: var_shift_v8i64:
12 ; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
14 %shift = lshr <8 x i64> %a, %b
18 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19 ; ALL-LABEL: var_shift_v16i32:
21 ; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
23 %shift = lshr <16 x i32> %a, %b
27 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28 ; AVX512DQ-LABEL: var_shift_v32i16:
30 ; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
31 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
32 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
33 ; AVX512DQ-NEXT: vpsrlvd %ymm5, %ymm6, %ymm5
34 ; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5
35 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
36 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
37 ; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
38 ; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0
39 ; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0
40 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
41 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
42 ; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm5, %ymm2
43 ; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2
44 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
45 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
46 ; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1
47 ; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1
48 ; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1
51 ; AVX512BW-LABEL: var_shift_v32i16:
53 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
55 %shift = lshr <32 x i16> %a, %b
59 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
60 ; AVX512DQ-LABEL: var_shift_v64i8:
62 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm4
63 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
64 ; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
65 ; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
66 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
67 ; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm4
68 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
69 ; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4
70 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
71 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
72 ; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm4
73 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
74 ; AVX512DQ-NEXT: vpand %ymm7, %ymm4, %ymm4
75 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
76 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
77 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2
78 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
79 ; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3
80 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
81 ; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2
82 ; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2
83 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
84 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
85 ; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2
86 ; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
87 ; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
88 ; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
91 %shift = lshr <64 x i8> %a, %b
96 ; Uniform Variable Shifts
99 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
100 ; ALL-LABEL: splatvar_shift_v8i64:
102 ; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
104 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
105 %shift = lshr <8 x i64> %a, %splat
109 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
110 ; ALL-LABEL: splatvar_shift_v16i32:
112 ; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
113 ; ALL-NEXT: vmovss %xmm1, %xmm2, %xmm1
114 ; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
116 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
117 %shift = lshr <16 x i32> %a, %splat
118 ret <16 x i32> %shift
121 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
122 ; AVX512DQ-LABEL: splatvar_shift_v32i16:
124 ; AVX512DQ-NEXT: vmovd %xmm2, %eax
125 ; AVX512DQ-NEXT: movzwl %ax, %eax
126 ; AVX512DQ-NEXT: vmovd %eax, %xmm2
127 ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
128 ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
129 ; AVX512DQ-NEXT: retq
131 ; AVX512BW-LABEL: splatvar_shift_v32i16:
133 ; AVX512BW-NEXT: vmovd %xmm1, %eax
134 ; AVX512BW-NEXT: movzwl %ax, %eax
135 ; AVX512BW-NEXT: vmovd %eax, %xmm1
136 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
137 ; AVX512BW-NEXT: retq
138 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
139 %shift = lshr <32 x i16> %a, %splat
140 ret <32 x i16> %shift
143 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
144 ; AVX512DQ-LABEL: splatvar_shift_v64i8:
146 ; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2
147 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm3
148 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
149 ; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
150 ; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
151 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
152 ; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm3
153 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
154 ; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3
155 ; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm6
156 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
157 ; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm3
158 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
159 ; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3
160 ; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8
161 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
162 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm3
163 ; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
164 ; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
165 ; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2
166 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
167 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
168 ; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2
169 ; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
170 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
171 ; AVX512DQ-NEXT: retq
172 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
173 %shift = lshr <64 x i8> %a, %splat
181 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
182 ; ALL-LABEL: constant_shift_v8i64:
184 ; ALL-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
186 %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
190 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
191 ; ALL-LABEL: constant_shift_v16i32:
193 ; ALL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
195 %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
196 ret <16 x i32> %shift
199 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
200 ; AVX512DQ-LABEL: constant_shift_v32i16:
202 ; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
203 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
204 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
205 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
206 ; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm5
207 ; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5
208 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
209 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
210 ; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
211 ; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0
212 ; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0
213 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
214 ; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm3, %ymm3
215 ; AVX512DQ-NEXT: vpsrld $16, %ymm3, %ymm3
216 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
217 ; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm1, %ymm1
218 ; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1
219 ; AVX512DQ-NEXT: vpackusdw %ymm3, %ymm1, %ymm1
220 ; AVX512DQ-NEXT: retq
222 ; AVX512BW-LABEL: constant_shift_v32i16:
224 ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
225 ; AVX512BW-NEXT: retq
226 %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
227 ret <32 x i16> %shift
230 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
231 ; AVX512DQ-LABEL: constant_shift_v64i8:
233 ; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2
234 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
235 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
236 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
237 ; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4
238 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
239 ; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2
240 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
241 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
242 ; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6
243 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
244 ; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2
245 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
246 ; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
247 ; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8
248 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
249 ; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2
250 ; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
251 ; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
252 ; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2
253 ; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
254 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
255 ; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2
256 ; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
257 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
258 ; AVX512DQ-NEXT: retq
259 %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
264 ; Uniform Constant Shifts
267 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
268 ; ALL-LABEL: splatconstant_shift_v8i64:
270 ; ALL-NEXT: vpsrlq $7, %zmm0, %zmm0
272 %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
276 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
277 ; ALL-LABEL: splatconstant_shift_v16i32:
279 ; ALL-NEXT: vpsrld $5, %zmm0, %zmm0
281 %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
282 ret <16 x i32> %shift
285 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
286 ; AVX512DQ-LABEL: splatconstant_shift_v32i16:
288 ; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
289 ; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
290 ; AVX512DQ-NEXT: retq
292 ; AVX512BW-LABEL: splatconstant_shift_v32i16:
294 ; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
295 ; AVX512BW-NEXT: retq
296 %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
297 ret <32 x i16> %shift
300 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
301 ; AVX512DQ-LABEL: splatconstant_shift_v64i8:
303 ; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
304 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
305 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
306 ; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
307 ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
308 ; AVX512DQ-NEXT: retq
310 ; AVX512BW-LABEL: splatconstant_shift_v64i8:
312 ; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
313 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
314 ; AVX512BW-NEXT: retq
315 %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>