1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-unknown"
6 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
7 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
9 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
10 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
11 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
12 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
13 ; CHECK-SSE2-NEXT: retq
14 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
15 ret <16 x i8> %shuffle
18 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
19 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
21 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
22 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
23 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
24 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
25 ; CHECK-SSE2-NEXT: retq
26 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
27 ret <16 x i8> %shuffle
30 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
31 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
33 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
34 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
35 ; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2
36 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
37 ; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0
38 ; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0
39 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
40 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
41 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
42 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
43 ; CHECK-SSE2-NEXT: retq
44 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
45 ret <16 x i8> %shuffle
48 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
49 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
51 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
52 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
53 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,1,1,4,5,6,7]
54 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,7,7]
55 ; CHECK-SSE2-NEXT: retq
56 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
57 ret <16 x i8> %shuffle
60 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
61 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
63 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
64 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
65 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm2
66 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm2[0,2,2,3]
67 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[0,2,2,3,4,5,6,7]
68 ; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm0
69 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
70 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
71 ; CHECK-SSE2-NEXT: punpcklwd %xmm2, %xmm0
72 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
73 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,2,3,4,5,6,7]
74 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,7]
75 ; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0
76 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
77 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,1]
78 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
79 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,6]
80 ; CHECK-SSE2-NEXT: retq
81 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
82 ret <16 x i8> %shuffle
85 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
86 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
88 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
89 ; CHECK-SSE2-NEXT: retq
90 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
91 ret <16 x i8> %shuffle
94 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
95 ; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101
96 ; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
97 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
98 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
99 ; CHECK-SSE2-NEXT: retq
100 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
101 ret <16 x i8> %shuffle
104 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
105 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23
106 ; CHECK-SSE2: punpcklbw %xmm1, %xmm0
107 ; CHECK-SSE2-NEXT: retq
108 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
109 ret <16 x i8> %shuffle
112 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
113 ; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
114 ; CHECK-SSE2: # BB#0:
115 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm1
116 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
117 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm1
118 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
119 ; CHECK-SSE2-NEXT: retq
120 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
121 ret <16 x i8> %shuffle
124 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
125 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
126 ; CHECK-SSE2: pxor %xmm1, %xmm1
127 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
128 ; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2
129 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
130 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
131 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
132 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
133 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
134 ; CHECK-SSE2-NEXT: packuswb %xmm2, %xmm0
135 ; CHECK-SSE2-NEXT: retq
136 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
137 ret <16 x i8> %shuffle
140 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
141 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
142 ; CHECK-SSE2: pxor %xmm2, %xmm2
143 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm1
144 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
145 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
146 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm0
147 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
148 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
149 ; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0
150 ; CHECK-SSE2-NEXT: retq
151 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
152 ret <16 x i8> %shuffle
155 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
156 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
157 ; CHECK-SSE2: pxor %xmm2, %xmm2
158 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm3
159 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm3
160 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
161 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
162 ; CHECK-SSE2-NEXT: punpckhbw %xmm2, %xmm4
163 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7]
164 ; CHECK-SSE2-NEXT: shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1]
165 ; CHECK-SSE2-NEXT: punpckhbw %xmm2, %xmm1
166 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
167 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm0
168 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
169 ; CHECK-SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
170 ; CHECK-SSE2-NEXT: packuswb %xmm4, %xmm0
171 ; CHECK-SSE2-NEXT: retq
172 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
173 ret <16 x i8> %shuffle
176 define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) {
177 ; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle
178 ; CHECK-SSE2: pxor %xmm1, %xmm1
179 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
180 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
181 ret <16 x i8> %shuffle
184 define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) {
185 ; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle
186 ; CHECK-SSE2: pxor %xmm1, %xmm1
187 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
188 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
189 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
190 ret <16 x i8> %shuffle