1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4 target triple = "x86_64-unknown-unknown"
6 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
7 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
9 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
11 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
14 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
16 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
18 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
20 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21 ret <32 x i8> %shuffle
24 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
25 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
27 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
29 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
30 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
33 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
35 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
36 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
37 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
38 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
40 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
41 ret <32 x i8> %shuffle
44 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
45 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
47 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
48 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
49 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
50 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
53 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
55 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
56 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
57 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
58 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
60 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
61 ret <32 x i8> %shuffle
64 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<32 x i8> %a, <32 x i8> %b) {
65 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00
67 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
68 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
69 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
70 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
73 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00
75 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
76 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
77 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
78 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
80 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
81 ret <32 x i8> %shuffle
84 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
85 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00
87 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
88 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
89 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
90 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
93 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00
95 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
96 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
97 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
98 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
100 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
101 ret <32 x i8> %shuffle
104 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
105 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00
107 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
108 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
109 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
110 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
113 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00
115 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
116 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
117 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
118 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
120 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <32 x i8> %shuffle
124 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
125 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00
127 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
128 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
129 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
130 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
133 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00
135 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
136 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
137 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
138 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
140 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
141 ret <32 x i8> %shuffle
144 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
145 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
147 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
148 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
149 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
150 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
153 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
155 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
156 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
157 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
158 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
160 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
161 ret <32 x i8> %shuffle
164 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
165 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
167 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
168 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
169 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
170 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
173 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
175 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
176 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
177 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
178 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
180 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
181 ret <32 x i8> %shuffle
184 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
185 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00
187 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
188 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
189 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
190 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
193 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00
195 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
196 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
197 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
198 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
200 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
201 ret <32 x i8> %shuffle
204 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
205 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00
207 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
208 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
209 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
210 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
213 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00
215 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
216 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
217 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
218 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
220 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
221 ret <32 x i8> %shuffle
224 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
225 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00
227 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
228 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
229 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
230 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
233 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00
235 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
236 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
237 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
238 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
240 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
241 ret <32 x i8> %shuffle
244 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
245 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00
247 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
248 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
249 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
250 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
253 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00
255 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
256 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
257 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
258 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
260 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
261 ret <32 x i8> %shuffle
264 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
265 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00
267 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
268 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
269 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
270 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
273 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00
275 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
276 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
277 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
278 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
280 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
281 ret <32 x i8> %shuffle
284 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
285 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
287 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
288 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
289 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
290 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
293 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
295 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
296 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
297 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
298 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
300 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
301 ret <32 x i8> %shuffle
304 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
305 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
307 ; AVX1-NEXT: movl $15, %eax
308 ; AVX1-NEXT: vmovd %eax, %xmm1
309 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
310 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
311 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
312 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
315 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
317 ; AVX2-NEXT: movl $15, %eax
318 ; AVX2-NEXT: vmovd %eax, %xmm1
319 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
320 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
321 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
322 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
324 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
325 ret <32 x i8> %shuffle
328 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
329 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
331 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
332 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
333 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
334 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
335 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero
336 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
337 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
340 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
342 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
343 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
344 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
345 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
346 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero
347 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
348 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
350 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
351 ret <32 x i8> %shuffle
354 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
355 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
357 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
358 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
359 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
360 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero
361 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0]
362 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
363 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
366 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
368 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
369 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
370 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
371 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero
372 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0]
373 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
374 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
376 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
377 ret <32 x i8> %shuffle
380 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
381 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
383 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
384 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
385 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
386 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[2],zero,zero
387 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0]
388 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
389 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
392 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
394 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
395 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
396 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
397 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[2],zero,zero
398 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0]
399 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
400 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
402 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
403 ret <32 x i8> %shuffle
406 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
407 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
409 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
410 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
411 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
412 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[3],zero,zero,zero
413 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0]
414 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
415 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
418 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
420 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
421 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
422 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
423 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[3],zero,zero,zero
424 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0]
425 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
426 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
428 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
429 ret <32 x i8> %shuffle
432 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
433 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
435 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
436 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
437 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
438 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[4],zero,zero,zero,zero
439 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0]
440 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
441 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
444 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
446 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
447 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
448 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
449 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[4],zero,zero,zero,zero
450 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0]
451 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
452 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
454 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
455 ret <32 x i8> %shuffle
458 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
459 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
461 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
462 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
463 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
464 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero
465 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0]
466 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
467 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
470 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
472 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
473 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
474 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
475 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero
476 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0]
477 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
478 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
480 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
481 ret <32 x i8> %shuffle
484 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
485 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
487 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
488 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
489 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
490 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[6],zero,zero,zero,zero,zero,zero
491 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0]
492 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
493 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
496 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
498 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
499 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
500 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
501 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[6],zero,zero,zero,zero,zero,zero
502 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0]
503 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
504 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
506 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
507 ret <32 x i8> %shuffle
510 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
511 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
513 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
514 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
515 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
516 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[7],zero,zero,zero,zero,zero,zero,zero
517 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0]
518 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
519 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
522 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
524 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
525 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
526 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
527 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[7],zero,zero,zero,zero,zero,zero,zero
528 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0]
529 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
530 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
532 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
533 ret <32 x i8> %shuffle
536 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
537 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
539 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
540 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
541 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
542 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,xmm2[8],zero,zero,zero,zero,zero,zero,zero,zero
543 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0]
544 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
545 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
548 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
550 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
551 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
552 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
553 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,xmm2[8],zero,zero,zero,zero,zero,zero,zero,zero
554 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0]
555 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
556 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
558 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
559 ret <32 x i8> %shuffle
562 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
563 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
565 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
566 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
567 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
568 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,xmm2[9],zero,zero,zero,zero,zero,zero,zero,zero,zero
569 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0]
570 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
571 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
574 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
576 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
577 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
578 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
579 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,xmm2[9],zero,zero,zero,zero,zero,zero,zero,zero,zero
580 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0]
581 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
582 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
584 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
585 ret <32 x i8> %shuffle
588 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
589 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
591 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
592 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
593 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
594 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,xmm2[10],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
595 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0]
596 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
597 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
600 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
602 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
603 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
604 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
605 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,xmm2[10],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
606 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0]
607 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
608 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
610 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
611 ret <32 x i8> %shuffle
614 define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
615 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
617 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
618 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
619 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
620 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,xmm2[11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
621 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0]
622 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
623 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
626 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
628 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
629 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
630 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
631 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,xmm2[11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
632 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0]
633 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
634 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
636 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
637 ret <32 x i8> %shuffle
640 define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
641 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
643 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
644 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
645 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
646 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,xmm2[12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
647 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0]
648 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
649 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
652 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
654 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
655 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
656 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
657 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,xmm2[12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
658 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0]
659 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
660 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
662 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
663 ret <32 x i8> %shuffle
666 define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
667 ; AVX1-LABEL: @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
669 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
670 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
671 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
672 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,xmm2[13],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
673 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0]
674 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
675 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
678 ; AVX2-LABEL: @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
680 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
681 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
682 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
683 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,xmm2[13],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
684 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0]
685 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
686 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
688 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
689 ret <32 x i8> %shuffle
692 define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
693 ; AVX1-LABEL: @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
695 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
696 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
697 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
698 ; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,xmm2[14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
699 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
700 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
701 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
704 ; AVX2-LABEL: @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
706 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
707 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
708 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
709 ; AVX2-NEXT: vpshufb {{.*}} # xmm2 = zero,xmm2[14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
710 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
711 ; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
712 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
714 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
715 ret <32 x i8> %shuffle
718 define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
719 ; AVX1-LABEL: @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
721 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
722 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
723 ; AVX1-NEXT: movl $128, %eax
724 ; AVX1-NEXT: vmovd %eax, %xmm2
725 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm2
726 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
727 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
728 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
729 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
732 ; AVX2-LABEL: @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
734 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
735 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
736 ; AVX2-NEXT: movl $128, %eax
737 ; AVX2-NEXT: vmovd %eax, %xmm2
738 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm2
739 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
740 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
741 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
742 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
744 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
745 ret <32 x i8> %shuffle
748 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
749 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
751 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
752 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
753 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
754 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
755 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
758 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
760 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
761 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
762 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
763 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
764 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
766 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
767 ret <32 x i8> %shuffle
770 define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
771 ; AVX1-LABEL: @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31
773 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
774 ; AVX1-NEXT: vmovdqa .LCPI33_0(%rip), %xmm2
775 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
776 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
777 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
780 ; AVX2-LABEL: @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31
782 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
783 ; AVX2-NEXT: vmovdqa .LCPI33_0(%rip), %xmm2
784 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
785 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
786 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
788 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
789 ret <32 x i8> %shuffle
792 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
793 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
795 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
796 ; AVX1-NEXT: vmovdqa .LCPI34_0(%rip), %xmm2
797 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
798 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
799 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
802 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
804 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
805 ; AVX2-NEXT: vmovdqa .LCPI34_0(%rip), %xmm2
806 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
807 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
808 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
810 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
811 ret <32 x i8> %shuffle
814 define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
815 ; AVX1-LABEL: @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31
817 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
818 ; AVX1-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
819 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
820 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
821 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
824 ; AVX2-LABEL: @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31
826 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
827 ; AVX2-NEXT: vmovdqa .LCPI35_0(%rip), %xmm2
828 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
829 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
830 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
832 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
833 ret <32 x i8> %shuffle
836 define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28(<32 x i8> %a, <32 x i8> %b) {
837 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28
839 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
840 ; AVX1-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
841 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
842 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
843 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
846 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28
848 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
849 ; AVX2-NEXT: vmovdqa .LCPI36_0(%rip), %xmm2
850 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
851 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
852 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
854 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
855 ret <32 x i8> %shuffle
858 define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
859 ; AVX1-LABEL: @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31
861 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
862 ; AVX1-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
863 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
864 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
865 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
868 ; AVX2-LABEL: @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31
870 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
871 ; AVX2-NEXT: vmovdqa .LCPI37_0(%rip), %xmm2
872 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
873 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
874 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
876 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15, i32 19, i32 19, i32 19, i32 19, i32 23, i32 23, i32 23, i32 23, i32 27, i32 27, i32 27, i32 27, i32 31, i32 31, i32 31, i32 31>
877 ret <32 x i8> %shuffle
880 define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30(<32 x i8> %a, <32 x i8> %b) {
881 ; AVX1-LABEL: @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30
883 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
884 ; AVX1-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
885 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
886 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
887 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
890 ; AVX2-LABEL: @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30
892 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
893 ; AVX2-NEXT: vmovdqa .LCPI38_0(%rip), %xmm2
894 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
895 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
896 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
898 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
899 ret <32 x i8> %shuffle
902 define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31(<32 x i8> %a, <32 x i8> %b) {
903 ; AVX1-LABEL: @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31
905 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
906 ; AVX1-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
907 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
908 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
909 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
912 ; AVX2-LABEL: @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31
914 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
915 ; AVX2-NEXT: vmovdqa .LCPI39_0(%rip), %xmm2
916 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
917 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
918 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
920 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15, i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
921 ret <32 x i8> %shuffle
924 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
925 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
927 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
928 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
931 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
933 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
934 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
936 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
937 ret <32 x i8> %shuffle
940 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
941 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
943 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
944 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
947 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
949 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
950 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
952 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
953 ret <32 x i8> %shuffle
956 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
957 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
959 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
960 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
963 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
965 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
966 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
968 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
969 ret <32 x i8> %shuffle
972 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
973 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
975 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
976 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
979 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
981 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
982 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
984 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
985 ret <32 x i8> %shuffle
988 define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
989 ; AVX1-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
991 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
992 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
995 ; AVX2-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
997 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
998 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1000 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1001 ret <32 x i8> %shuffle
1004 define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1005 ; AVX1-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
1007 ; AVX1-NEXT: movl $15, %eax
1008 ; AVX1-NEXT: vmovd %eax, %xmm1
1009 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1010 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1013 ; AVX2-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
1015 ; AVX2-NEXT: movl $15, %eax
1016 ; AVX2-NEXT: vmovd %eax, %xmm1
1017 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1018 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1020 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1021 ret <32 x i8> %shuffle
1024 define <32 x i8> @shuffle_v32i8_32_32_02_02_36_36_06_06_40_40_10_10_44_44_14_14_48_48_18_18_52_52_22_22_56_56_26_26_60_60_30_30(<32 x i8> %a, <32 x i8> %b) {
1025 ; AVX1-LABEL: @shuffle_v32i8_32_32_02_02_36_36_06_06_40_40_10_10_44_44_14_14_48_48_18_18_52_52_22_22_56_56_26_26_60_60_30_30
1027 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1028 ; AVX1-NEXT: vmovdqa .LCPI46_0(%rip), %xmm3
1029 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1030 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1031 ; AVX1-NEXT: vmovdqa .LCPI46_1(%rip), %xmm5
1032 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1033 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
1034 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1035 ; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1036 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
1037 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1040 ; AVX2-LABEL: @shuffle_v32i8_32_32_02_02_36_36_06_06_40_40_10_10_44_44_14_14_48_48_18_18_52_52_22_22_56_56_26_26_60_60_30_30
1042 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1043 ; AVX2-NEXT: vmovdqa .LCPI46_0(%rip), %xmm3
1044 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1045 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
1046 ; AVX2-NEXT: vmovdqa .LCPI46_1(%rip), %xmm5
1047 ; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1048 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
1049 ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1050 ; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1051 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
1052 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1054 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 2, i32 2, i32 36, i32 36, i32 6, i32 6, i32 40, i32 40, i32 10, i32 10, i32 44, i32 44, i32 14, i32 14, i32 48, i32 48, i32 18, i32 18, i32 52, i32 52, i32 22, i32 22, i32 56, i32 56, i32 26, i32 26, i32 60, i32 60, i32 30, i32 30>
1055 ret <32 x i8> %shuffle
1058 define <32 x i8> @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32(<32 x i8> %a, <32 x i8> %b) {
1059 ; AVX1-LABEL: @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32
1061 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,0,0,0,2,2,0,0,0,0,0,0,6,6,0,0]
1062 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1063 ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
1064 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1065 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1068 ; AVX2-LABEL: @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32
1070 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,0,0,0,2,2,0,0,0,0,0,0,6,6,0,0]
1071 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1072 ; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
1073 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1074 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1076 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32>
1077 ret <32 x i8> %shuffle
1080 define <32 x i8> @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_16_16_48_48_16_16_48_48_16_16_48_48_16_16_48_48(<32 x i8> %a, <32 x i8> %b) {
1081 ; AVX1-LABEL: @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_16_16_48_48_16_16_48_48_16_16_48_48_16_16_48_48
1083 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1084 ; AVX1-NEXT: vmovdqa .LCPI48_0(%rip), %xmm3
1085 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1086 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1087 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1088 ; AVX1-NEXT: vpshufd {{.*}} # xmm4 = xmm4[0,0,0,0]
1089 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
1090 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
1091 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1092 ; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
1093 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1094 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1097 ; AVX2-LABEL: @shuffle_v32i8_00_00_32_32_00_00_32_32_00_00_32_32_00_00_32_32_16_16_48_48_16_16_48_48_16_16_48_48_16_16_48_48
1099 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
1100 ; AVX2-NEXT: vmovdqa .LCPI48_0(%rip), %xmm3
1101 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1102 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
1103 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1104 ; AVX2-NEXT: vpshufd {{.*}} # xmm4 = xmm4[0,0,0,0]
1105 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
1106 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
1107 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1108 ; AVX2-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,0,0,0]
1109 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1110 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1112 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 0, i32 0, i32 32, i32 32, i32 16, i32 16, i32 48, i32 48, i32 16, i32 16, i32 48, i32 48, i32 16, i32 16, i32 48, i32 48, i32 16, i32 16, i32 48, i32 48>
1113 ret <32 x i8> %shuffle
1116 define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_48_48_48_48_48_48_48_48_24_24_26_26_28_28_30_30(<32 x i8> %a, <32 x i8> %b) {
1117 ; AVX1-LABEL: @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_48_48_48_48_48_48_48_48_24_24_26_26_28_28_30_30
1119 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1120 ; AVX1-NEXT: vmovdqa .LCPI49_0(%rip), %xmm3
1121 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1122 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1123 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1124 ; AVX1-NEXT: vpshuflw {{.*}} # xmm4 = xmm4[0,0,0,0,4,5,6,7]
1125 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
1126 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1127 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1128 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1129 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1130 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1133 ; AVX2-LABEL: @shuffle_v32i8_32_32_32_32_32_32_32_32_08_08_10_10_12_12_14_14_48_48_48_48_48_48_48_48_24_24_26_26_28_28_30_30
1135 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1136 ; AVX2-NEXT: vmovdqa .LCPI49_0(%rip), %xmm3
1137 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1138 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
1139 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1140 ; AVX2-NEXT: vpshuflw {{.*}} # xmm4 = xmm4[0,0,0,0,4,5,6,7]
1141 ; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
1142 ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1143 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1144 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1145 ; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
1146 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1148 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
1149 ret <32 x i8> %shuffle
1152 define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_54_54_52_52_50_50_48_48_30_30_28_28_26_26_24_24(<32 x i8> %a, <32 x i8> %b) {
1153 ; AVX1-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_54_54_52_52_50_50_48_48_30_30_28_28_26_26_24_24
1155 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1156 ; AVX1-NEXT: vmovdqa .LCPI50_0(%rip), %xmm3
1157 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1158 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1159 ; AVX1-NEXT: vmovdqa .LCPI50_1(%rip), %xmm5
1160 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1161 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
1162 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1163 ; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1164 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1165 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1168 ; AVX2-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_14_14_12_12_10_10_08_08_54_54_52_52_50_50_48_48_30_30_28_28_26_26_24_24
1170 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1171 ; AVX2-NEXT: vmovdqa .LCPI50_0(%rip), %xmm3
1172 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1173 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
1174 ; AVX2-NEXT: vmovdqa .LCPI50_1(%rip), %xmm5
1175 ; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1176 ; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
1177 ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1178 ; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1179 ; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
1180 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1182 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 38, i32 38, i32 36, i32 36, i32 34, i32 34, i32 32, i32 32, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8, i32 54, i32 54, i32 52, i32 52, i32 50, i32 50, i32 48, i32 48, i32 30, i32 30, i32 28, i32 28, i32 26, i32 26, i32 24, i32 24>
1183 ret <32 x i8> %shuffle
1186 define <32 x i8> @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_54_54_52_52_50_50_48_48_22_22_20_20_18_18_16_16(<32 x i8> %a, <32 x i8> %b) {
1187 ; AVX1-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_54_54_52_52_50_50_48_48_22_22_20_20_18_18_16_16
1189 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1190 ; AVX1-NEXT: vmovdqa .LCPI51_0(%rip), %xmm3
1191 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1192 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1193 ; AVX1-NEXT: vmovdqa .LCPI51_1(%rip), %xmm5
1194 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1195 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
1196 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1197 ; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1198 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1199 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1202 ; AVX2-LABEL: @shuffle_v32i8_38_38_36_36_34_34_32_32_06_06_04_04_02_02_00_00_54_54_52_52_50_50_48_48_22_22_20_20_18_18_16_16
1204 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1205 ; AVX2-NEXT: vmovdqa .LCPI51_0(%rip), %xmm3
1206 ; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1207 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
1208 ; AVX2-NEXT: vmovdqa .LCPI51_1(%rip), %xmm5
1209 ; AVX2-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1210 ; AVX2-NEXT: vpblendd {{.*}} # xmm2 = xmm4[0,1],xmm2[2,3]
1211 ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1212 ; AVX2-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1213 ; AVX2-NEXT: vpblendd {{.*}} # xmm0 = xmm1[0,1],xmm0[2,3]
1214 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1216 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 38, i32 38, i32 36, i32 36, i32 34, i32 34, i32 32, i32 32, i32 6, i32 6, i32 4, i32 4, i32 2, i32 2, i32 0, i32 0, i32 54, i32 54, i32 52, i32 52, i32 50, i32 50, i32 48, i32 48, i32 22, i32 22, i32 20, i32 20, i32 18, i32 18, i32 16, i32 16>
1217 ret <32 x i8> %shuffle
1220 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16(<32 x i8> %a, <32 x i8> %b) {
1221 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16
1223 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1224 ; AVX1-NEXT: vmovdqa .LCPI52_0(%rip), %xmm2
1225 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1226 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1227 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1230 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16
1232 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1233 ; AVX2-NEXT: vmovdqa .LCPI52_0(%rip), %xmm2
1234 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1235 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1236 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1238 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 16>
1239 ret <32 x i8> %shuffle
1242 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16(<32 x i8> %a, <32 x i8> %b) {
1243 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16
1245 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1246 ; AVX1-NEXT: vmovdqa .LCPI53_0(%rip), %xmm2
1247 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1248 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1249 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1252 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16
1254 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1255 ; AVX2-NEXT: vmovdqa .LCPI53_0(%rip), %xmm2
1256 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1257 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1258 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1260 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 18, i32 16, i32 16>
1261 ret <32 x i8> %shuffle
1264 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1265 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16
1267 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1268 ; AVX1-NEXT: vmovdqa .LCPI54_0(%rip), %xmm2
1269 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1270 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1271 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1274 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16
1276 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1277 ; AVX2-NEXT: vmovdqa .LCPI54_0(%rip), %xmm2
1278 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1279 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1280 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1282 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1283 ret <32 x i8> %shuffle
1286 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1287 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16
1289 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1290 ; AVX1-NEXT: vmovdqa .LCPI55_0(%rip), %xmm2
1291 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1292 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1293 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1296 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16
1298 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1299 ; AVX2-NEXT: vmovdqa .LCPI55_0(%rip), %xmm2
1300 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1301 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1302 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1304 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1305 ret <32 x i8> %shuffle
1308 define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1309 ; AVX1-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1311 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1312 ; AVX1-NEXT: vmovdqa .LCPI56_0(%rip), %xmm2
1313 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1314 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1315 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1318 ; AVX2-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1320 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1321 ; AVX2-NEXT: vmovdqa .LCPI56_0(%rip), %xmm2
1322 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1323 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1324 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1326 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 30, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1327 ret <32 x i8> %shuffle
1330 define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1331 ; AVX1-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1333 ; AVX1-NEXT: movl $15, %eax
1334 ; AVX1-NEXT: vmovd %eax, %xmm1
1335 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1336 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm2
1337 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1338 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1341 ; AVX2-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1343 ; AVX2-NEXT: movl $15, %eax
1344 ; AVX2-NEXT: vmovd %eax, %xmm1
1345 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1346 ; AVX2-NEXT: vpshufb %xmm1, %xmm2, %xmm2
1347 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1348 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1350 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 31, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1351 ret <32 x i8> %shuffle
1354 define <32 x i8> @shuffle_v32i8_00_00_32_32_02_02_34_34_04_04_36_36_06_06_38_38_16_16_48_48_18_18_50_50_20_20_52_52_22_22_54_54(<32 x i8> %a, <32 x i8> %b) {
1355 ; AVX1-LABEL: @shuffle_v32i8_00_00_32_32_02_02_34_34_04_04_36_36_06_06_38_38_16_16_48_48_18_18_50_50_20_20_52_52_22_22_54_54
1357 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1358 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1359 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1360 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1361 ; AVX1-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1362 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1363 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1364 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1365 ; AVX1-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1366 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1367 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1370 ; AVX2-LABEL: @shuffle_v32i8_00_00_32_32_02_02_34_34_04_04_36_36_06_06_38_38_16_16_48_48_18_18_50_50_20_20_52_52_22_22_54_54
1372 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1373 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1374 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
1375 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1376 ; AVX2-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1377 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1378 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1379 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1380 ; AVX2-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1381 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1382 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1384 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 32, i32 32, i32 2, i32 2, i32 34, i32 34, i32 4, i32 4, i32 36, i32 36, i32 6, i32 6, i32 38, i32 38, i32 16, i32 16, i32 48, i32 48, i32 18, i32 18, i32 50, i32 50, i32 20, i32 20, i32 52, i32 52, i32 22, i32 22, i32 54, i32 54>
1385 ret <32 x i8> %shuffle
1388 define <32 x i8> @shuffle_v32i8_08_08_40_40_10_10_42_42_12_12_44_44_14_14_46_46_24_24_56_56_26_26_58_58_28_28_60_60_30_30_62_62(<32 x i8> %a, <32 x i8> %b) {
1389 ; AVX1-LABEL: @shuffle_v32i8_08_08_40_40_10_10_42_42_12_12_44_44_14_14_46_46_24_24_56_56_26_26_58_58_28_28_60_60_30_30_62_62
1391 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1392 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1393 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1394 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1395 ; AVX1-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1396 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1397 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1398 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1399 ; AVX1-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1400 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1401 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1404 ; AVX2-LABEL: @shuffle_v32i8_08_08_40_40_10_10_42_42_12_12_44_44_14_14_46_46_24_24_56_56_26_26_58_58_28_28_60_60_30_30_62_62
1406 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1407 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1408 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
1409 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1410 ; AVX2-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1411 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1412 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1413 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1414 ; AVX2-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1415 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1416 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1418 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 40, i32 40, i32 10, i32 10, i32 42, i32 42, i32 12, i32 12, i32 44, i32 44, i32 14, i32 14, i32 46, i32 46, i32 24, i32 24, i32 56, i32 56, i32 26, i32 26, i32 58, i32 58, i32 28, i32 28, i32 60, i32 60, i32 30, i32 30, i32 62, i32 62>
1419 ret <32 x i8> %shuffle
1422 define <32 x i8> @shuffle_v32i8_00_00_32_32_02_02_34_34_04_04_36_36_06_06_38_38_24_24_56_56_26_26_58_58_28_28_60_60_30_30_62_62(<32 x i8> %a, <32 x i8> %b) {
1423 ; AVX1-LABEL: @shuffle_v32i8_00_00_32_32_02_02_34_34_04_04_36_36_06_06_38_38_24_24_56_56_26_26_58_58_28_28_60_60_30_30_62_62
1425 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1426 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1427 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1428 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1429 ; AVX1-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1430 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1431 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1432 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1433 ; AVX1-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1434 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1435 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1438 ; AVX2-LABEL: @shuffle_v32i8_00_00_32_32_02_02_34_34_04_04_36_36_06_06_38_38_24_24_56_56_26_26_58_58_28_28_60_60_30_30_62_62
1440 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1441 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1442 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
1443 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1444 ; AVX2-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1445 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1446 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1447 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1448 ; AVX2-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1449 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1450 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1452 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 32, i32 32, i32 2, i32 2, i32 34, i32 34, i32 4, i32 4, i32 36, i32 36, i32 6, i32 6, i32 38, i32 38, i32 24, i32 24, i32 56, i32 56, i32 26, i32 26, i32 58, i32 58, i32 28, i32 28, i32 60, i32 60, i32 30, i32 30, i32 62, i32 62>
1453 ret <32 x i8> %shuffle
1456 define <32 x i8> @shuffle_v32i8_08_08_40_40_10_10_42_42_12_12_44_44_14_14_46_46_16_16_48_48_18_18_50_50_20_20_52_52_22_22_54_54(<32 x i8> %a, <32 x i8> %b) {
1457 ; AVX1-LABEL: @shuffle_v32i8_08_08_40_40_10_10_42_42_12_12_44_44_14_14_46_46_16_16_48_48_18_18_50_50_20_20_52_52_22_22_54_54
1459 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1460 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1461 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1462 ; AVX1-NEXT: vpunpcklbw {{.*}} # xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1463 ; AVX1-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1464 ; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1465 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1466 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1467 ; AVX1-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1468 ; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1469 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1472 ; AVX2-LABEL: @shuffle_v32i8_08_08_40_40_10_10_42_42_12_12_44_44_14_14_46_46_16_16_48_48_18_18_50_50_20_20_52_52_22_22_54_54
1474 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1475 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1476 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
1477 ; AVX2-NEXT: vpunpcklbw {{.*}} # xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1478 ; AVX2-NEXT: vpalignr {{.*}} # xmm3 = xmm3[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1479 ; AVX2-NEXT: vpblendw {{.*}} # xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
1480 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1481 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1482 ; AVX2-NEXT: vpalignr {{.*}} # xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1483 ; AVX2-NEXT: vpblendw {{.*}} # xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1484 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1486 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 40, i32 40, i32 10, i32 10, i32 42, i32 42, i32 12, i32 12, i32 44, i32 44, i32 14, i32 14, i32 46, i32 46, i32 16, i32 16, i32 48, i32 48, i32 18, i32 18, i32 50, i32 50, i32 20, i32 20, i32 52, i32 52, i32 22, i32 22, i32 54, i32 54>
1487 ret <32 x i8> %shuffle
1490 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1491 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1493 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1494 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1495 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1496 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1499 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1501 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1502 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1503 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1504 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1506 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 17, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1507 ret <32 x i8> %shuffle
1510 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1511 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16
1513 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1514 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1515 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
1516 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1519 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16
1521 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1522 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1523 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
1524 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1526 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 18, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1527 ret <32 x i8> %shuffle
1530 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1531 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16
1533 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
1534 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1535 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
1536 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1539 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16
1541 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
1542 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1543 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
1544 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1546 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1547 ret <32 x i8> %shuffle
1550 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1551 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16
1553 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
1554 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1555 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
1556 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1559 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16
1561 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
1562 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1563 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
1564 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1566 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1567 ret <32 x i8> %shuffle
1570 define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
1571 ; AVX1-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16
1573 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1574 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1575 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
1576 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1579 ; AVX2-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16
1581 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1582 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1583 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
1584 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1586 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
1587 ret <32 x i8> %shuffle
1590 define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31(<32 x i8> %a, <32 x i8> %b) {
1591 ; AVX1-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31
1593 ; AVX1-NEXT: movl $15, %eax
1594 ; AVX1-NEXT: vmovd %eax, %xmm1
1595 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1596 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1597 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
1598 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1601 ; AVX2-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31
1603 ; AVX2-NEXT: movl $15, %eax
1604 ; AVX2-NEXT: vmovd %eax, %xmm1
1605 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1606 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1607 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
1608 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1610 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 31>
1611 ret <32 x i8> %shuffle
1614 define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1615 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16
1617 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1618 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1619 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
1620 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1623 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16
1625 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1626 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1627 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
1628 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1630 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 28, i32 28, i32 28, i32 28, i32 24, i32 24, i32 24, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
1631 ret <32 x i8> %shuffle
1634 define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
1635 ; AVX1-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
1637 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
1638 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1639 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
1640 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1643 ; AVX2-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
1645 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
1646 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1647 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
1648 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1650 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
1651 ret <32 x i8> %shuffle
1654 define <32 x i8> @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_28_28_16_16(<32 x i8> %a, <32 x i8> %b) {
1655 ; AVX1-LABEL: @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_28_28_16_16
1657 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1658 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,0,0,0,12,12,13,13,0,0,0,0,12,12,0,0]
1659 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1662 ; AVX2-LABEL: @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_28_28_16_16
1664 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1665 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,0,0,0,12,12,13,13,0,0,0,0,12,12,0,0]
1666 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1668 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 28, i32 28, i32 16, i32 16>
1669 ret <32 x i8> %shuffle
1672 define <32 x i8> @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_30(<32 x i8> %a, <32 x i8> %b) {
1673 ; AVX1-LABEL: @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_30
1675 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
1676 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1677 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,1,1,14,14,15,15,0,0,0,0,0,0,14,14]
1678 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1681 ; AVX2-LABEL: @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_30
1683 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
1684 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1685 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,1,1,14,14,15,15,0,0,0,0,0,0,14,14]
1686 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1688 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 undef, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 30>
1689 ret <32 x i8> %shuffle
1692 define <32 x i8> @shuffle_v32i8_00_00_00_uu_uu_uu_04_04_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_24_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1693 ; AVX1-LABEL: @shuffle_v32i8_00_00_00_uu_uu_uu_04_04_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_24_24_20_20_20_20_16_16_16_16
1695 ; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1696 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1697 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
1698 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1701 ; AVX2-LABEL: @shuffle_v32i8_00_00_00_uu_uu_uu_04_04_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_24_24_20_20_20_20_16_16_16_16
1703 ; AVX2-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1704 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1705 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
1706 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1708 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 undef, i32 28, i32 28, i32 28, i32 28, i32 undef, i32 undef, i32 24, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
1709 ret <32 x i8> %shuffle
1712 define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
1713 ; AVX1-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24
1715 ; AVX1-NEXT: vpunpckhbw {{.*}} # xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1716 ; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1717 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1718 ; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,8,8,9,9,8,8,8,8,8,8,8,8]
1719 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1722 ; AVX2-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24
1724 ; AVX2-NEXT: vpunpckhbw {{.*}} # xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1725 ; AVX2-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1726 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1727 ; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,8,8,9,9,8,8,8,8,8,8,8,8]
1728 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1730 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
1731 ret <32 x i8> %shuffle