1 ; RUN: llc < %s -mcpu=x86-64 -mattr=-sse2 | FileCheck %s --check-prefix=SSE1
3 target triple = "x86_64-unknown-unknown"
5 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
6 ; SSE1-LABEL: shuffle_v4f32_0001:
8 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
10 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
11 ret <4 x float> %shuffle
13 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
14 ; SSE1-LABEL: shuffle_v4f32_0020:
16 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
18 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
19 ret <4 x float> %shuffle
21 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
22 ; SSE1-LABEL: shuffle_v4f32_0300:
24 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
26 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
27 ret <4 x float> %shuffle
29 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
30 ; SSE1-LABEL: shuffle_v4f32_1000:
32 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
34 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
35 ret <4 x float> %shuffle
37 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
38 ; SSE1-LABEL: shuffle_v4f32_2200:
40 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
42 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
43 ret <4 x float> %shuffle
45 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
46 ; SSE1-LABEL: shuffle_v4f32_3330:
48 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
50 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
51 ret <4 x float> %shuffle
53 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
54 ; SSE1-LABEL: shuffle_v4f32_3210:
56 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
58 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
59 ret <4 x float> %shuffle
61 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
62 ; SSE1-LABEL: shuffle_v4f32_0011:
64 ; SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
66 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
67 ret <4 x float> %shuffle
69 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
70 ; SSE1-LABEL: shuffle_v4f32_2233:
72 ; SSE1-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
74 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
75 ret <4 x float> %shuffle
77 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
78 ; SSE1-LABEL: shuffle_v4f32_0022:
80 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
82 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
83 ret <4 x float> %shuffle
85 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
86 ; SSE1-LABEL: shuffle_v4f32_1133:
88 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
90 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
91 ret <4 x float> %shuffle
94 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
95 ; SSE1-LABEL: shuffle_v4f32_4zzz:
97 ; SSE1-NEXT: xorps %xmm1, %xmm1
98 ; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
99 ; SSE1-NEXT: movaps %xmm1, %xmm0
101 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
102 ret <4 x float> %shuffle
105 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
106 ; SSE1-LABEL: shuffle_v4f32_z4zz:
108 ; SSE1-NEXT: xorps %xmm1, %xmm1
109 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
110 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
112 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
113 ret <4 x float> %shuffle
116 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
117 ; SSE1-LABEL: shuffle_v4f32_zz4z:
119 ; SSE1-NEXT: xorps %xmm1, %xmm1
120 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
121 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
122 ; SSE1-NEXT: movaps %xmm1, %xmm0
124 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
125 ret <4 x float> %shuffle
128 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
129 ; SSE1-LABEL: shuffle_v4f32_zuu4:
131 ; SSE1-NEXT: xorps %xmm1, %xmm1
132 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
133 ; SSE1-NEXT: movaps %xmm1, %xmm0
135 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
136 ret <4 x float> %shuffle
139 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
140 ; SSE1-LABEL: shuffle_v4f32_zzz7:
142 ; SSE1-NEXT: xorps %xmm1, %xmm1
143 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
144 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
145 ; SSE1-NEXT: movaps %xmm1, %xmm0
147 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
148 ret <4 x float> %shuffle
151 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
152 ; SSE1-LABEL: shuffle_v4f32_z6zz:
154 ; SSE1-NEXT: xorps %xmm1, %xmm1
155 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
156 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
158 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
159 ret <4 x float> %shuffle
162 define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
163 ; SSE1-LABEL: insert_reg_and_zero_v4f32:
165 ; SSE1-NEXT: xorps %xmm1, %xmm1
166 ; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
167 ; SSE1-NEXT: movaps %xmm1, %xmm0
169 %v = insertelement <4 x float> undef, float %a, i32 0
170 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
171 ret <4 x float> %shuffle
174 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
175 ; SSE1-LABEL: insert_mem_and_zero_v4f32:
177 ; SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
179 %a = load float, float* %ptr
180 %v = insertelement <4 x float> undef, float %a, i32 0
181 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
182 ret <4 x float> %shuffle
185 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
186 ; SSE1-LABEL: insert_mem_lo_v4f32:
188 ; SSE1-NEXT: movq (%rdi), %rax
189 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
190 ; SSE1-NEXT: shrq $32, %rax
191 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
192 ; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
193 ; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
194 ; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
195 ; SSE1-NEXT: xorps %xmm2, %xmm2
196 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
197 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
198 ; SSE1-NEXT: movaps %xmm1, %xmm0
200 %a = load <2 x float>, <2 x float>* %ptr
201 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
202 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
203 ret <4 x float> %shuffle
206 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
207 ; SSE1-LABEL: insert_mem_hi_v4f32:
209 ; SSE1-NEXT: movq (%rdi), %rax
210 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
211 ; SSE1-NEXT: shrq $32, %rax
212 ; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
213 ; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
214 ; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
215 ; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
216 ; SSE1-NEXT: xorps %xmm2, %xmm2
217 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
218 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
220 %a = load <2 x float>, <2 x float>* %ptr
221 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
222 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
223 ret <4 x float> %shuffle
226 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
227 ; SSE1-LABEL: shuffle_mem_v4f32_3210:
229 ; SSE1-NEXT: movaps (%rdi), %xmm0
230 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
232 %a = load <4 x float>, <4 x float>* %ptr
233 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
234 ret <4 x float> %shuffle