1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-unknown"
7 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
8 ; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
10 ; FIXME-NEXT: punpcklbw %xmm0, %xmm0
11 ; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
12 ; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
14 ; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
16 ; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
18 ; SSE2-NEXT: punpcklbw %xmm0, %xmm0
19 ; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
20 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
21 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
24 ; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
26 ; SSSE3-NEXT: pxor %xmm1, %xmm1
27 ; SSSE3-NEXT: pshufb %xmm1, %xmm0
29 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
30 ret <16 x i8> %shuffle
33 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
34 ; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
36 ; SSE2-NEXT: punpcklbw %xmm0, %xmm0
37 ; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
38 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
39 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
42 ; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
44 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
46 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
47 ret <16 x i8> %shuffle
50 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
51 ; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
53 ; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
54 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7]
55 ; SSE2-NEXT: punpcklbw %xmm0, %xmm0
56 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
57 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6]
60 ; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
62 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
64 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
65 ret <16 x i8> %shuffle
68 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
69 ; ALL-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
71 ; ALL-NEXT: punpcklbw %xmm0, %xmm0
72 ; ALL-NEXT: punpcklwd %xmm0, %xmm0
74 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
75 ret <16 x i8> %shuffle
78 define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
79 ; ALL-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07
81 ; ALL-NEXT: punpcklbw %xmm0, %xmm0
82 ; ALL-NEXT: punpckhwd %xmm0, %xmm0
84 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
85 ret <16 x i8> %shuffle
88 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
89 ; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
91 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
92 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
93 ; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
94 ; SSE2-NEXT: punpcklbw %xmm0, %xmm0
95 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
96 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
99 ; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
101 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
103 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
104 ret <16 x i8> %shuffle
107 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
108 ; ALL-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
110 ; ALL-NEXT: punpcklbw %xmm0, %xmm0
112 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
113 ret <16 x i8> %shuffle
116 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
117 ; FIXME-LABEL: @shuffle_v16i8_0101010101010101
119 ; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
120 ; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
123 ; SSE2-LABEL: @shuffle_v16i8_0101010101010101
125 ; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
126 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
127 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
130 ; SSSE3-LABEL: @shuffle_v16i8_0101010101010101
132 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
134 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
135 ret <16 x i8> %shuffle
138 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
139 ; ALL-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23
140 ; ALL: punpcklbw %xmm1, %xmm0
142 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
143 ret <16 x i8> %shuffle
146 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
147 ; SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
149 ; SSE2-NEXT: punpcklbw %xmm1, %xmm1
150 ; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
151 ; SSE2-NEXT: punpcklbw %xmm0, %xmm1
152 ; SSE2-NEXT: movdqa %xmm1, %xmm0
155 ; SSSE3-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
157 ; SSSE3-NEXT: punpcklbw %xmm1, %xmm1
158 ; SSSE3-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
159 ; SSSE3-NEXT: punpcklbw %xmm0, %xmm1
160 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
162 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
163 ret <16 x i8> %shuffle
166 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
167 ; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
169 ; SSE2-NEXT: pxor %xmm1, %xmm1
170 ; SSE2-NEXT: movdqa %xmm0, %xmm2
171 ; SSE2-NEXT: punpckhbw %xmm1, %xmm2
172 ; SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
173 ; SSE2-NEXT: pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
174 ; SSE2-NEXT: punpcklbw %xmm1, %xmm0
175 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
176 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
177 ; SSE2-NEXT: packuswb %xmm2, %xmm0
180 ; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
182 ; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
183 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
184 ; SSSE3-NEXT: por %xmm1, %xmm0
186 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
187 ret <16 x i8> %shuffle
190 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
191 ; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
193 ; SSE2-NEXT: pxor %xmm2, %xmm2
194 ; SSE2-NEXT: punpcklbw %xmm2, %xmm1
195 ; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
196 ; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
197 ; SSE2-NEXT: punpcklbw %xmm2, %xmm0
198 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
199 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
200 ; SSE2-NEXT: packuswb %xmm1, %xmm0
203 ; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
205 ; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
206 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
207 ; SSSE3-NEXT: por %xmm1, %xmm0
209 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
210 ret <16 x i8> %shuffle
213 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
214 ; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
216 ; SSE2-NEXT: pxor %xmm2, %xmm2
217 ; SSE2-NEXT: movdqa %xmm1, %xmm3
218 ; SSE2-NEXT: punpcklbw %xmm2, %xmm3
219 ; SSE2-NEXT: pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
220 ; SSE2-NEXT: movdqa %xmm0, %xmm4
221 ; SSE2-NEXT: punpckhbw %xmm2, %xmm4
222 ; SSE2-NEXT: pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7]
223 ; SSE2-NEXT: shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1]
224 ; SSE2-NEXT: punpckhbw %xmm2, %xmm1
225 ; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
226 ; SSE2-NEXT: punpcklbw %xmm2, %xmm0
227 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
228 ; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
229 ; SSE2-NEXT: packuswb %xmm4, %xmm0
232 ; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
234 ; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
235 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
236 ; SSSE3-NEXT: por %xmm1, %xmm0
238 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
239 ret <16 x i8> %shuffle
242 define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) {
243 ; ALL-LABEL: @zext_to_v8i16_shuffle
244 ; ALL: pxor %xmm1, %xmm1
245 ; ALL-NEXT: punpcklbw %xmm1, %xmm0
246 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
247 ret <16 x i8> %shuffle
250 define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) {
251 ; ALL-LABEL: @zext_to_v4i32_shuffle
252 ; ALL: pxor %xmm1, %xmm1
253 ; ALL-NEXT: punpcklbw %xmm1, %xmm0
254 ; ALL-NEXT: punpcklbw %xmm1, %xmm0
255 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
256 ret <16 x i8> %shuffle
259 define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
260 ; FIXME-LABEL: @trunc_v4i32_shuffle
263 ; FIXME-NEXT: packuswb %xmm0, %xmm0
264 ; FIXME-NEXT: packuswb %xmm0, %xmm0
267 ; SSE2-LABEL: @trunc_v4i32_shuffle
270 ; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
271 ; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
272 ; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
273 ; SSE2-NEXT: packuswb %xmm0, %xmm0
276 ; SSSE3-LABEL: @trunc_v4i32_shuffle
278 ; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
279 ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
280 ; SSSE3-NEXT: por %xmm1, %xmm0
282 %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
283 ret <16 x i8> %shuffle