1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK-SKX
7 define <16 x float> @test1(<16 x float> %a) nounwind {
8 %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
15 define <16 x i32> @test2(<16 x i32> %a) nounwind {
16 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
23 define <8 x i64> @test3(<8 x i64> %a) nounwind {
24 %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
31 define <8 x double> @test4(<8 x double> %a) nounwind {
32 %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
39 define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
40 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
47 define <8 x i64> @test6(<8 x i64> %a) nounwind {
48 %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
55 define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
56 %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
63 define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
64 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
71 define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
72 %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
76 ; CHECK-LABEL: test10:
79 define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
80 %c = load <16 x float>, <16 x float>* %b
81 %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
85 ; CHECK-LABEL: test11:
88 define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
89 %c = load <16 x i32>, <16 x i32>* %b
90 %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
95 ; CHECK: vpermilps $177, %zmm
97 define <16 x float> @test13(<16 x float> %a) {
98 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
102 ; CHECK-LABEL: test14
103 ; CHECK: vpermilpd $203, %zmm
105 define <8 x double> @test14(<8 x double> %a) {
106 %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
110 ; CHECK-LABEL: test15
111 ; CHECK: vpshufd $177, %zmm
113 define <16 x i32> @test15(<16 x i32> %a) {
114 ; mask 1-0-3-2 = 10110001 = 0xb1 = 177
115 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
118 ; CHECK-LABEL: test16
119 ; CHECK: valignq $2, %zmm0, %zmm1
121 define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
122 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
126 ; CHECK-LABEL: test17
127 ; CHECK: vshufpd $19, %zmm1, %zmm0
129 define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
130 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
134 ; CHECK-LABEL: test18
135 ; CHECK: vpunpckhdq %zmm
137 define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
138 %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
142 ; CHECK-LABEL: test19
143 ; CHECK: vpunpckldq %zmm
145 define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
146 %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
150 ; CHECK-LABEL: test20
151 ; CHECK: vpunpckhqdq %zmm
153 define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
154 %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
158 ; CHECK-LABEL: test21
159 ; CHECK: vbroadcastsd %xmm0, %zmm
161 define <8 x double> @test21(<8 x double> %a, <8 x double> %b) {
162 %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
163 ret <8 x double> %shuffle
166 ; CHECK-LABEL: test22
167 ; CHECK: vpbroadcastq %xmm0, %zmm
169 define <8 x i64> @test22(<8 x i64> %a, <8 x i64> %b) {
170 %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
171 ret <8 x i64> %shuffle
174 ; CHECK-LABEL: @test23
178 define <16 x i32> @test23(<16 x i32> %a, <16 x i32> %b) nounwind {
179 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
183 ; CHECK-LABEL: @test24
186 define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
187 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 25, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
191 ; CHECK-LABEL: @test25
194 define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind {
195 ; mask - 0-1-3-0 00110100 = 0x34 = 52
196 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 16, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef>
200 ; CHECK-LABEL: @test26
203 define <16 x i32> @test26(<16 x i32> %a) nounwind {
204 %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef>
208 ; CHECK-LABEL: @test27
210 define <16 x i32> @test27(<4 x i32>%a) {
211 %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
215 ; CHECK-LABEL: test28
216 ; CHECK: vpshufhw $177, %ymm
218 define <16 x i16> @test28(<16 x i16> %a) {
219 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32><i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
223 ; CHECK-LABEL: test29
224 ; CHECK: vunpcklps %zmm
226 define <16 x float> @test29(<16 x float> %a, <16 x float> %c) {
227 %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
231 ; CHECK-LABEL: @test30
232 ; CHECK: vshufps $144, %zmm
234 define <16 x float> @test30(<16 x float> %a, <16 x float> %c) {
235 %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30>
239 ; CHECK-LABEL: test31
240 ; CHECK: valignd $3, %zmm0, %zmm1
242 define <16 x i32> @test31(<16 x i32> %a, <16 x i32> %b) nounwind {
243 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
247 ; CHECK-LABEL: test32
248 ; CHECK: vshufpd $99, %zmm0, %zmm1
250 define <8 x double> @test32(<8 x double> %a, <8 x double> %b) nounwind {
251 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 1, i32 10, i32 2, i32 undef, i32 5, i32 15, i32 undef>
255 define <16 x i32> @test_align_v16i32_rr(<16 x i32> %a, <16 x i32> %b) nounwind {
256 ; CHECK-LABEL: test_align_v16i32_rr:
258 ; CHECK-NEXT: valignd $3, %zmm0, %zmm1, %zmm0
260 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
264 define <16 x i32> @test_align_v16i32_rm(<16 x i32>* %a.ptr, <16 x i32> %b) nounwind {
265 ; CHECK-LABEL: test_align_v16i32_rm:
267 ; CHECK-NEXT: valignd $3, (%rdi), %zmm0, %zmm0
269 %a = load <16 x i32>, <16 x i32>* %a.ptr
270 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
274 define <16 x i32> @test_align_v16i32_rm_mask(<16 x i32>* %a.ptr, <16 x i32> %b, <16 x i1> %mask) nounwind {
275 ; CHECK-LABEL: test_align_v16i32_rm_mask:
277 ; CHECK-NEXT: vpmovsxbd %xmm1, %zmm1
278 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
279 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
280 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
281 ; CHECK-NEXT: valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
282 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
285 ; CHECK-SKX-LABEL: test_align_v16i32_rm_mask:
286 ; CHECK-SKX: ## BB#0:
287 ; CHECK-SKX-NEXT: vpmovb2m %xmm1, %k1
288 ; CHECK-SKX-NEXT: vmovdqa32 (%rdi), %zmm1
289 ; CHECK-SKX-NEXT: valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
290 ; CHECK-SKX-NEXT: vmovaps %zmm1, %zmm0
291 ; CHECK-SKX-NEXT: retq
292 %a = load <16 x i32>, <16 x i32>* %a.ptr
293 %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
294 %res = select <16 x i1> %mask,<16 x i32> %c, <16 x i32> %a
298 define <8 x double> @test_align_v8f64_rr(<8 x double> %a, <8 x double> %b) nounwind {
299 ; CHECK-LABEL: test_align_v8f64_rr:
301 ; CHECK-NEXT: valignq $3, %zmm0, %zmm1, %zmm0
303 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
307 define <8 x double> @test_align_v18f64_rm(<8 x double>* %a.ptr, <8 x double> %b) nounwind {
308 ; CHECK-LABEL: test_align_v18f64_rm:
310 ; CHECK-NEXT: valignq $3, (%rdi), %zmm0, %zmm0
312 %a = load <8 x double>, <8 x double>* %a.ptr
313 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
317 define <8 x double> @test_align_v18f64_rm_mask(<8 x double>* %a.ptr, <8 x double> %b, <8 x i1> %mask) nounwind {
318 ; CHECK-LABEL: test_align_v18f64_rm_mask:
320 ; CHECK-NEXT: vpmovsxwq %xmm1, %zmm1
321 ; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
322 ; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
323 ; CHECK-NEXT: valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
326 ; CHECK-SKX-LABEL: test_align_v18f64_rm_mask:
327 ; CHECK-SKX: ## BB#0:
328 ; CHECK-SKX-NEXT: vpmovw2m %xmm1, %k1
329 ; CHECK-SKX-NEXT: valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
330 ; CHECK-SKX-NEXT: retq
331 %a = load <8 x double>, <8 x double>* %a.ptr
332 %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
333 %res = select <8 x i1> %mask,<8 x double> %c, <8 x double> zeroinitializer
334 ret <8 x double> %res