1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
7 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
9 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
10 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
13 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
15 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
16 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
18 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
19 ret <16 x i16> %shuffle
22 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
23 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
25 ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
26 ; AVX1-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
27 ; AVX1-NEXT: vpshufhw {{.*}} # xmm1 = xmm0[0,1,2,3,4,4,4,4]
28 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,5,4]
29 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
32 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
34 ; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
35 ; AVX2-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
36 ; AVX2-NEXT: vpshufhw {{.*}} # xmm1 = xmm0[0,1,2,3,4,4,4,4]
37 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,5,4]
38 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
40 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
41 ret <16 x i16> %shuffle
44 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
45 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
47 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
48 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
49 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
52 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
54 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
55 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
56 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
58 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
59 ret <16 x i16> %shuffle
62 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
63 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00
65 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
66 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
67 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
70 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00
72 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
73 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
74 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
76 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
77 ret <16 x i16> %shuffle
80 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
81 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00
83 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
84 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
85 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
88 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00
90 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
91 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
92 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
94 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
95 ret <16 x i16> %shuffle
98 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
99 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00
101 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
102 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
103 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
106 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00
108 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
109 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
110 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
112 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
113 ret <16 x i16> %shuffle
116 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
117 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00
119 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
120 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
121 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
124 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00
126 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
127 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
128 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
130 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
131 ret <16 x i16> %shuffle
134 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
135 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
137 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
138 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
139 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
142 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
144 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
145 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
146 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
148 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
149 ret <16 x i16> %shuffle
152 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
153 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
156 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
157 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,1,4,5,6,7]
158 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
159 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
160 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
161 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
164 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
166 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
167 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
168 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,1,4,5,6,7]
169 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
170 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
171 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
172 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
174 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
175 ret <16 x i16> %shuffle
178 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
179 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00
181 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
182 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
183 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,3,0,4,5,6,7]
184 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
185 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
186 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
187 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
190 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00
192 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
193 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
194 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,3,0,4,5,6,7]
195 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
196 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
197 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
198 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
200 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
201 ret <16 x i16> %shuffle
204 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
205 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00
207 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
208 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
209 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
210 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,3,0,0,4,5,6,7]
211 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
212 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
213 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
214 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
217 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00
219 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
220 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
221 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
222 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,3,0,0,4,5,6,7]
223 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
224 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
225 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
226 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
228 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
229 ret <16 x i16> %shuffle
232 define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
233 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00
235 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
236 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
237 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
238 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,0,0,0,4,5,6,7]
239 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
240 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
241 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
242 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
245 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00
247 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
248 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
249 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
250 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,0,0,0,4,5,6,7]
251 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
252 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm2[0],xmm1[0]
253 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
254 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
256 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
257 ret <16 x i16> %shuffle
260 define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
261 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00
263 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
264 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
265 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
266 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,1,4,5,6,7]
267 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
268 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
269 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
270 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
273 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00
275 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
276 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
277 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
278 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,1,4,5,6,7]
279 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
280 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
281 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
282 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
284 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
285 ret <16 x i16> %shuffle
288 define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
289 ; AVX1-LABEL: @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00
291 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
292 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
293 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
294 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,3,0,4,5,6,7]
295 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
296 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
297 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
298 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
301 ; AVX2-LABEL: @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00
303 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
304 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
305 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
306 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,3,0,4,5,6,7]
307 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
308 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
309 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
310 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
312 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
313 ret <16 x i16> %shuffle
316 define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
317 ; AVX1-LABEL: @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
319 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
320 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
321 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
322 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
323 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,3,0,0,4,5,6,7]
324 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
325 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
326 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
327 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
330 ; AVX2-LABEL: @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
332 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
333 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
334 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
335 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
336 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,3,0,0,4,5,6,7]
337 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
338 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
339 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
340 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
342 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
343 ret <16 x i16> %shuffle
346 define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
347 ; AVX1-LABEL: @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
349 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
350 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
351 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
352 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
353 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,0,0,0,4,5,6,7]
354 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
355 ; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
356 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
357 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
360 ; AVX2-LABEL: @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
362 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
363 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
364 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
365 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
366 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,0,0,0,4,5,6,7]
367 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm0[0,0,0,0,4,5,6,7]
368 ; AVX2-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
369 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
370 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
372 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
373 ret <16 x i16> %shuffle
376 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
377 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
379 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
380 ; AVX1-NEXT: vmovdqa .LCPI16_0(%rip), %xmm2
381 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
382 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
383 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
386 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
388 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
389 ; AVX2-NEXT: vmovdqa .LCPI16_0(%rip), %xmm2
390 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
391 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
392 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
394 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
395 ret <16 x i16> %shuffle
398 define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
399 ; AVX1-LABEL: @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15
401 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
402 ; AVX1-NEXT: vmovdqa .LCPI17_0(%rip), %xmm2
403 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
404 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
405 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
408 ; AVX2-LABEL: @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15
410 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
411 ; AVX2-NEXT: vmovdqa .LCPI17_0(%rip), %xmm2
412 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
413 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
414 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
416 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
417 ret <16 x i16> %shuffle
420 define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
421 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
423 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,0,0,0,4,5,6,7]
424 ; AVX1-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,4,4]
425 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
426 ; AVX1-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
427 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
428 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
431 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
433 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,0,0,0,4,5,6,7]
434 ; AVX2-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,4,4]
435 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
436 ; AVX2-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
437 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
438 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
440 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
441 ret <16 x i16> %shuffle
444 define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
445 ; AVX1-LABEL: @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15
447 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[3,3,3,3,4,5,6,7]
448 ; AVX1-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,7,7,7]
449 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
450 ; AVX1-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[3,3,3,3,4,5,6,7]
451 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,7,7,7]
452 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
455 ; AVX2-LABEL: @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15
457 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[3,3,3,3,4,5,6,7]
458 ; AVX2-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,7,7,7]
459 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
460 ; AVX2-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[3,3,3,3,4,5,6,7]
461 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,7,7,7]
462 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
464 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
465 ret <16 x i16> %shuffle
468 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
469 ; AVX1-LABEL: @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14
471 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,0,2,2,4,5,6,7]
472 ; AVX1-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,6,6]
473 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
474 ; AVX1-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
475 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
476 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
479 ; AVX2-LABEL: @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14
481 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,0,2,2,4,5,6,7]
482 ; AVX2-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,6,6]
483 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
484 ; AVX2-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
485 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
486 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
488 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
489 ret <16 x i16> %shuffle
492 define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
493 ; AVX1-LABEL: @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15
495 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[1,1,3,3,4,5,6,7]
496 ; AVX1-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,5,5,7,7]
497 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
498 ; AVX1-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[1,1,3,3,4,5,6,7]
499 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,7,7]
500 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
503 ; AVX2-LABEL: @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15
505 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[1,1,3,3,4,5,6,7]
506 ; AVX2-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,5,5,7,7]
507 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
508 ; AVX2-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[1,1,3,3,4,5,6,7]
509 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,7,7]
510 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
512 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
513 ret <16 x i16> %shuffle
516 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
517 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00
519 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
520 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
523 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00
525 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
526 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
528 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
529 ret <16 x i16> %shuffle
532 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
533 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00
535 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
536 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
539 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00
541 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
542 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
544 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
545 ret <16 x i16> %shuffle
548 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
549 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00
551 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
552 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
555 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00
557 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
558 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
560 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
561 ret <16 x i16> %shuffle
564 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
565 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00
567 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
568 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
571 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00
573 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
574 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
576 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
577 ret <16 x i16> %shuffle
580 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
581 ; AVX1-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00
583 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
584 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
587 ; AVX2-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00
589 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
590 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
592 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
593 ret <16 x i16> %shuffle
596 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
597 ; AVX1-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00
599 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
600 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
603 ; AVX2-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00
605 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
606 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
608 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
609 ret <16 x i16> %shuffle
612 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
613 ; AVX1-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
615 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
616 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
619 ; AVX2-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
621 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
622 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
624 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
625 ret <16 x i16> %shuffle
628 define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
629 ; AVX1-LABEL: @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15
631 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
632 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
633 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
634 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
635 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
638 ; AVX2-LABEL: @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15
640 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
641 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
642 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
643 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
644 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
646 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
647 ret <16 x i16> %shuffle
650 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
651 ; AVX1-LABEL: @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16
653 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1]
654 ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
655 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
656 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
659 ; AVX2-LABEL: @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16
661 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1]
662 ; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
663 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
664 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
666 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
667 ret <16 x i16> %shuffle
670 define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
671 ; AVX1-LABEL: @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24
673 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
674 ; AVX1-NEXT: vmovdqa .LCPI31_0(%rip), %xmm3
675 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
676 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
677 ; AVX1-NEXT: vpshufd {{.*}} # xmm4 = xmm4[0,0,0,0]
678 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
679 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
680 ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
681 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
682 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
685 ; AVX2-LABEL: @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24
687 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
688 ; AVX2-NEXT: vmovdqa .LCPI31_0(%rip), %xmm3
689 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
690 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
691 ; AVX2-NEXT: vpshufd {{.*}} # xmm4 = xmm4[0,0,0,0]
692 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
693 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
694 ; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
695 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
696 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
698 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
699 ret <16 x i16> %shuffle
702 define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
703 ; AVX1-LABEL: @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15
705 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
706 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
707 ; AVX1-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[0,0,0,0,4,5,6,7]
708 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
709 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
710 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
711 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
714 ; AVX2-LABEL: @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15
716 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
717 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
718 ; AVX2-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[0,0,0,0,4,5,6,7]
719 ; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm3[0,1],xmm2[2,3]
720 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
721 ; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
722 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
724 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
725 ret <16 x i16> %shuffle
728 define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
729 ; AVX1-LABEL: @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12
731 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
732 ; AVX1-NEXT: vpshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
733 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
734 ; AVX1-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[3,2,1,0,4,5,6,7]
735 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
736 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
737 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
738 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
739 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
742 ; AVX2-LABEL: @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12
744 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
745 ; AVX2-NEXT: vpshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
746 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
747 ; AVX2-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[3,2,1,0,4,5,6,7]
748 ; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm3[0,1],xmm2[2,3]
749 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
750 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
751 ; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
752 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
754 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
755 ret <16 x i16> %shuffle
758 define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
759 ; AVX1-LABEL: @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08
761 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
762 ; AVX1-NEXT: vpshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
763 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
764 ; AVX1-NEXT: vpshufd {{.*}} # xmm3 = xmm3[0,1,0,1]
765 ; AVX1-NEXT: vpshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
766 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
767 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
768 ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
769 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
770 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
771 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
774 ; AVX2-LABEL: @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08
776 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
777 ; AVX2-NEXT: vpshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
778 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
779 ; AVX2-NEXT: vpshufd {{.*}} # xmm3 = xmm3[0,1,0,1]
780 ; AVX2-NEXT: vpshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
781 ; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm2[0,1],xmm3[2,3]
782 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
783 ; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
784 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
785 ; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
786 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
788 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
789 ret <16 x i16> %shuffle
792 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
793 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08
795 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
796 ; AVX1-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
797 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
798 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
799 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
802 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08
804 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
805 ; AVX2-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
806 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
807 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
808 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
810 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
811 ret <16 x i16> %shuffle
814 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
815 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08
817 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
818 ; AVX1-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
819 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
820 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
821 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
824 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08
826 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
827 ; AVX2-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
828 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
829 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
830 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
832 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
833 ret <16 x i16> %shuffle
836 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
837 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08
839 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
840 ; AVX1-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
841 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
842 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
843 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
846 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08
848 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
849 ; AVX2-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
850 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
851 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
852 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
854 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
855 ret <16 x i16> %shuffle
858 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
859 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08
861 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
862 ; AVX1-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
863 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
864 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
865 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
868 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08
870 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
871 ; AVX2-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
872 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
873 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
874 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
876 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
877 ret <16 x i16> %shuffle
880 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
881 ; AVX1-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08
883 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
884 ; AVX1-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
885 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
886 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
887 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
890 ; AVX2-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08
892 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
893 ; AVX2-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
894 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
895 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
896 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
898 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
899 ret <16 x i16> %shuffle
902 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
903 ; AVX1-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08
905 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
906 ; AVX1-NEXT: vmovdqa .LCPI40_0(%rip), %xmm2
907 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
908 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
909 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
912 ; AVX2-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08
914 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
915 ; AVX2-NEXT: vmovdqa .LCPI40_0(%rip), %xmm2
916 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
917 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
918 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
920 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
921 ret <16 x i16> %shuffle
924 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
925 ; AVX1-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08
927 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
928 ; AVX1-NEXT: vmovdqa .LCPI41_0(%rip), %xmm2
929 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
930 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
931 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
934 ; AVX2-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08
936 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
937 ; AVX2-NEXT: vmovdqa .LCPI41_0(%rip), %xmm2
938 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
939 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
940 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
942 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
943 ret <16 x i16> %shuffle
946 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
947 ; AVX1-LABEL: @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27
949 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
950 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3]
951 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
952 ; AVX1-NEXT: vpmovzxwd %xmm3, %xmm3
953 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
954 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3]
955 ; AVX1-NEXT: vpmovzxwd %xmm0, %xmm0
956 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
957 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
960 ; AVX2-LABEL: @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27
962 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
963 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3]
964 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
965 ; AVX2-NEXT: vpmovzxwd %xmm3, %xmm3
966 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
967 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3]
968 ; AVX2-NEXT: vpmovzxwd %xmm0, %xmm0
969 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
970 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
972 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
973 ret <16 x i16> %shuffle
976 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
977 ; AVX1-LABEL: @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31
979 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
980 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm2 = xmm2[4,4,5,5,6,6,7,7]
981 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
982 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm3 = xmm3[4,4,5,5,6,6,7,7]
983 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
984 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm1 = xmm1[4,4,5,5,6,6,7,7]
985 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm0 = xmm0[4,4,5,5,6,6,7,7]
986 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
987 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
990 ; AVX2-LABEL: @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31
992 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
993 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm2 = xmm2[4,4,5,5,6,6,7,7]
994 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
995 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm3 = xmm3[4,4,5,5,6,6,7,7]
996 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
997 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm1 = xmm1[4,4,5,5,6,6,7,7]
998 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm0 = xmm0[4,4,5,5,6,6,7,7]
999 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1000 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1002 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1003 ret <16 x i16> %shuffle
1006 define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
1007 ; AVX1-LABEL: @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31
1009 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1010 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm2 = xmm2[4,4,5,5,6,6,7,7]
1011 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1012 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm3 = xmm3[4,4,5,5,6,6,7,7]
1013 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
1014 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3]
1015 ; AVX1-NEXT: vpmovzxwd %xmm0, %xmm0
1016 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1017 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1020 ; AVX2-LABEL: @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31
1022 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1023 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm2 = xmm2[4,4,5,5,6,6,7,7]
1024 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
1025 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm3 = xmm3[4,4,5,5,6,6,7,7]
1026 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
1027 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3]
1028 ; AVX2-NEXT: vpmovzxwd %xmm0, %xmm0
1029 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1030 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1032 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1033 ret <16 x i16> %shuffle
1036 define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
1037 ; AVX1-LABEL: @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27
1039 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1040 ; AVX1-NEXT: vpunpcklwd {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3]
1041 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1042 ; AVX1-NEXT: vpmovzxwd %xmm3, %xmm3
1043 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
1044 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm1 = xmm1[4,4,5,5,6,6,7,7]
1045 ; AVX1-NEXT: vpunpckhwd {{.*}} # xmm0 = xmm0[4,4,5,5,6,6,7,7]
1046 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1047 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1050 ; AVX2-LABEL: @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27
1052 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1053 ; AVX2-NEXT: vpunpcklwd {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3]
1054 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
1055 ; AVX2-NEXT: vpmovzxwd %xmm3, %xmm3
1056 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
1057 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm1 = xmm1[4,4,5,5,6,6,7,7]
1058 ; AVX2-NEXT: vpunpckhwd {{.*}} # xmm0 = xmm0[4,4,5,5,6,6,7,7]
1059 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1060 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1062 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
1063 ret <16 x i16> %shuffle
1066 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1067 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08
1069 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
1070 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1071 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1072 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1075 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08
1077 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
1078 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1079 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1080 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1082 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1083 ret <16 x i16> %shuffle
1086 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1087 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08
1089 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
1090 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1091 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
1092 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1095 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08
1097 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
1098 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1099 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
1100 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1102 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
1103 ret <16 x i16> %shuffle
1106 define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1107 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08
1109 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
1110 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1111 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
1112 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1115 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08
1117 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
1118 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1119 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
1120 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1122 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
1123 ret <16 x i16> %shuffle
1126 define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
1127 ; AVX1-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08
1129 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1130 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1131 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
1132 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1135 ; AVX2-LABEL: @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08
1137 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1138 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1139 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
1140 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1142 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
1143 ret <16 x i16> %shuffle
1146 define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
1147 ; AVX1-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08
1149 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1150 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1151 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
1152 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1155 ; AVX2-LABEL: @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08
1157 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1158 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1159 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
1160 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1162 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
1163 ret <16 x i16> %shuffle
1166 define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
1167 ; AVX1-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08
1169 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1170 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1171 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
1172 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1175 ; AVX2-LABEL: @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08
1177 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1178 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1179 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
1180 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1182 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
1183 ret <16 x i16> %shuffle
1186 define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
1187 ; AVX1-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15
1189 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1190 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1191 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
1192 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1195 ; AVX2-LABEL: @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15
1197 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1198 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1199 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
1200 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1202 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
1203 ret <16 x i16> %shuffle
1206 define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
1207 ; AVX1-LABEL: @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08
1209 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,0,2,2,4,5,6,7]
1210 ; AVX1-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,6,6]
1211 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1212 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
1213 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1216 ; AVX2-LABEL: @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08
1218 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,0,2,2,4,5,6,7]
1219 ; AVX2-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,6,6]
1220 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1221 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
1222 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1224 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
1225 ret <16 x i16> %shuffle
1228 define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
1229 ; AVX1-LABEL: @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12
1231 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1232 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1233 ; AVX1-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
1234 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
1235 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1238 ; AVX2-LABEL: @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12
1240 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1241 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1242 ; AVX2-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
1243 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
1244 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1246 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
1247 ret <16 x i16> %shuffle
1250 define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
1251 ; AVX1-LABEL: @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08
1253 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
1254 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1255 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
1256 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1259 ; AVX2-LABEL: @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08
1261 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
1262 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1263 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
1264 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1266 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
1267 ret <16 x i16> %shuffle
1270 define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
1271 ; AVX1-LABEL: @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15
1273 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1274 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1275 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
1276 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1279 ; AVX2-LABEL: @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15
1281 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1282 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1283 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
1284 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1286 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
1287 ret <16 x i16> %shuffle
1290 define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
1291 ; AVX1-LABEL: @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08
1293 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,1,2,2,4,5,6,7]
1294 ; AVX1-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,6,6]
1295 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1296 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
1297 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1300 ; AVX2-LABEL: @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08
1302 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm0[0,1,2,2,4,5,6,7]
1303 ; AVX2-NEXT: vpshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,4,6,6]
1304 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1305 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
1306 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1308 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
1309 ret <16 x i16> %shuffle
1312 define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
1313 ; AVX1-LABEL: @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12
1315 ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[2,1,2,3]
1316 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1317 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1318 ; AVX1-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,3,4,5,6,7]
1319 ; AVX1-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
1320 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1323 ; AVX2-LABEL: @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12
1325 ; AVX2-NEXT: vpshufd {{.*}} # xmm1 = xmm0[2,1,2,3]
1326 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1327 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1328 ; AVX2-NEXT: vpshuflw {{.*}} # xmm0 = xmm0[0,0,0,3,4,5,6,7]
1329 ; AVX2-NEXT: vpshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
1330 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1332 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
1333 ret <16 x i16> %shuffle