1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
4 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
6 ; CHECK: ## BB#0: ## %entry
7 ; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0
10 %q = load i8, i8* %ptr, align 4
11 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
12 %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
13 %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
14 %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
15 %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
16 %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
17 %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
18 %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
19 %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
20 %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
21 %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
22 %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
23 %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
24 %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
25 %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
26 %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
30 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
32 ; CHECK: ## BB#0: ## %entry
33 ; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0
36 %q = load i8, i8* %ptr, align 4
37 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
38 %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
39 %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
40 %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
41 %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
42 %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
43 %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
44 %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
45 %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
46 %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
47 %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
48 %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
49 %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
50 %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
51 %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
52 %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
54 %q20 = insertelement <32 x i8> %qf, i8 %q, i32 16
55 %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
56 %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
57 %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
58 %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
59 %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
60 %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
61 %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
62 %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
63 %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
64 %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
65 %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
66 %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
67 %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
68 %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
69 %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
73 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
75 ; CHECK: ## BB#0: ## %entry
76 ; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0
79 %q = load i16, i16* %ptr, align 4
80 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
81 %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
82 %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
83 %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
84 %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
85 %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
86 %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
87 %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
91 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
93 ; CHECK: ## BB#0: ## %entry
94 ; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0
97 %q = load i16, i16* %ptr, align 4
98 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
99 %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
100 %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
101 %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
102 %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
103 %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
104 %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
105 %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
106 %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
107 %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
108 %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
109 %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
110 %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
111 %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
112 %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
113 %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
117 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
119 ; CHECK: ## BB#0: ## %entry
120 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
123 %q = load i32, i32* %ptr, align 4
124 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
125 %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
126 %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
127 %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
131 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
133 ; CHECK: ## BB#0: ## %entry
134 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
137 %q = load i32, i32* %ptr, align 4
138 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
139 %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
140 %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
141 %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
142 %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
143 %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
144 %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
145 %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
149 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
151 ; CHECK: ## BB#0: ## %entry
152 ; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
155 %q = load i64, i64* %ptr, align 4
156 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
157 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
161 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
163 ; CHECK: ## BB#0: ## %entry
164 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
167 %q = load i64, i64* %ptr, align 4
168 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
169 %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
170 %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
171 %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
175 ; FIXME: Pointer adjusted broadcasts
177 define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
178 ; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
179 ; CHECK: ## BB#0: ## %entry
180 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
181 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
184 %ld = load <16 x i8>, <16 x i8>* %ptr
185 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
189 define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
190 ; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
191 ; CHECK: ## BB#0: ## %entry
192 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
193 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
194 ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
197 %ld = load <16 x i8>, <16 x i8>* %ptr
198 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
202 define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
203 ; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
204 ; CHECK: ## BB#0: ## %entry
205 ; CHECK-NEXT: vmovdqa (%rdi), %ymm0
206 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
207 ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
210 %ld = load <32 x i8>, <32 x i8>* %ptr
211 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
215 define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
216 ; CHECK-LABEL: load_splat_8i16_8i16_11111111:
217 ; CHECK: ## BB#0: ## %entry
218 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
219 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
222 %ld = load <8 x i16>, <8 x i16>* %ptr
223 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
227 define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
228 ; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
229 ; CHECK: ## BB#0: ## %entry
230 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
231 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
232 ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
235 %ld = load <8 x i16>, <8 x i16>* %ptr
236 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
240 define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
241 ; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
242 ; CHECK: ## BB#0: ## %entry
243 ; CHECK-NEXT: vmovdqa (%rdi), %ymm0
244 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
245 ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
248 %ld = load <16 x i16>, <16 x i16>* %ptr
249 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
253 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
254 ; CHECK-LABEL: load_splat_4i32_4i32_1111:
255 ; CHECK: ## BB#0: ## %entry
256 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
259 %ld = load <4 x i32>, <4 x i32>* %ptr
260 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
264 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
265 ; CHECK-LABEL: load_splat_8i32_4i32_33333333:
266 ; CHECK: ## BB#0: ## %entry
267 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
268 ; CHECK-NEXT: vpbroadcastd LCPI15_0(%rip), %ymm1
269 ; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0
272 %ld = load <4 x i32>, <4 x i32>* %ptr
273 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
277 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
278 ; CHECK-LABEL: load_splat_8i32_8i32_55555555:
279 ; CHECK: ## BB#0: ## %entry
280 ; CHECK-NEXT: vpbroadcastd LCPI16_0(%rip), %ymm0
281 ; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0
284 %ld = load <8 x i32>, <8 x i32>* %ptr
285 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
289 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
290 ; CHECK-LABEL: load_splat_4f32_4f32_1111:
291 ; CHECK: ## BB#0: ## %entry
292 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
295 %ld = load <4 x float>, <4 x float>* %ptr
296 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
300 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
301 ; CHECK-LABEL: load_splat_8f32_4f32_33333333:
302 ; CHECK: ## BB#0: ## %entry
303 ; CHECK-NEXT: vmovaps (%rdi), %xmm0
\r
304 ; CHECK-NEXT: vbroadcastss LCPI18_0(%rip), %ymm1
\r
305 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
308 %ld = load <4 x float>, <4 x float>* %ptr
309 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
313 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
314 ; CHECK-LABEL: load_splat_8f32_8f32_55555555:
315 ; CHECK: ## BB#0: ## %entry
316 ; CHECK-NEXT: vbroadcastss LCPI19_0(%rip), %ymm0
317 ; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0
320 %ld = load <8 x float>, <8 x float>* %ptr
321 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
325 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
326 ; CHECK-LABEL: load_splat_2i64_2i64_1111:
327 ; CHECK: ## BB#0: ## %entry
328 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
331 %ld = load <2 x i64>, <2 x i64>* %ptr
332 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
336 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
337 ; CHECK-LABEL: load_splat_4i64_2i64_1111:
338 ; CHECK: ## BB#0: ## %entry
339 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
340 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
343 %ld = load <2 x i64>, <2 x i64>* %ptr
344 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
348 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
349 ; CHECK-LABEL: load_splat_4i64_4i64_2222:
350 ; CHECK: ## BB#0: ## %entry
351 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = mem[2,2,2,2]
354 %ld = load <4 x i64>, <4 x i64>* %ptr
355 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
359 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
360 ; CHECK-LABEL: load_splat_2f64_2f64_1111:
361 ; CHECK: ## BB#0: ## %entry
362 ; CHECK-NEXT: vmovaps (%rdi), %xmm0
363 ; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
366 %ld = load <2 x double>, <2 x double>* %ptr
367 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
368 ret <2 x double> %ret
371 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
372 ; CHECK-LABEL: load_splat_4f64_2f64_1111:
373 ; CHECK: ## BB#0: ## %entry
374 ; CHECK-NEXT: vmovapd (%rdi), %xmm0
375 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
378 %ld = load <2 x double>, <2 x double>* %ptr
379 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
380 ret <4 x double> %ret
383 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
384 ; CHECK-LABEL: load_splat_4f64_4f64_2222:
385 ; CHECK: ## BB#0: ## %entry
386 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,2,2,2]
389 %ld = load <4 x double>, <4 x double>* %ptr
390 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
391 ret <4 x double> %ret
394 ; make sure that we still don't support broadcast double into 128-bit vector
396 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
398 ; CHECK: ## BB#0: ## %entry
399 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
402 %q = load double, double* %ptr, align 4
403 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
404 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
405 ret <2 x double> %vecinit2.i
408 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
410 ; CHECK: ## BB#0: ## %entry
411 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
412 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
415 %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
419 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
421 ; CHECK: ## BB#0: ## %entry
422 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
423 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
426 %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
430 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
433 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
435 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
436 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
437 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
438 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
439 ret <4 x float> %vecinit6.i
442 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
445 ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
447 %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
448 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
449 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
450 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
451 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
452 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
453 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
454 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
455 ret <8 x i8> %vecinit7.i
459 define void @crash() nounwind alwaysinline {
460 ; CHECK-LABEL: crash:
461 ; CHECK: ## BB#0: ## %WGLoopsEntry
462 ; CHECK-NEXT: xorl %eax, %eax
463 ; CHECK-NEXT: testb %al, %al
464 ; CHECK-NEXT: je LBB31_1
465 ; CHECK-NEXT: ## BB#2: ## %ret
467 ; CHECK-NEXT: .align 4, 0x90
468 ; CHECK-NEXT: LBB31_1: ## %footer349VF
469 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
470 ; CHECK-NEXT: jmp LBB31_1
472 br i1 undef, label %ret, label %footer329VF
475 %A.0.inVF = fmul float undef, 6.553600e+04
476 %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
477 %A.0VF = fptosi float %A.0.inVF to i32
478 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
479 %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
480 %1 = and i32 %A.0VF, 65535
481 %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
482 %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
483 br i1 undef, label %preload1201VF, label %footer349VF
486 br label %footer349VF
489 %2 = mul nsw <8 x i32> undef, %0
490 %3 = mul nsw <8 x i32> undef, %vector1099VF
491 br label %footer329VF
497 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
498 ; CHECK-LABEL: _inreg0:
500 ; CHECK-NEXT: vmovd %edi, %xmm0
501 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
503 %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
504 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
508 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
509 ; CHECK-LABEL: _inreg1:
511 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
513 %in = insertelement <8 x float> undef, float %scalar, i32 0
514 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
515 ret <8 x float> %wide
518 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
519 ; CHECK-LABEL: _inreg2:
521 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
523 %in = insertelement <4 x float> undef, float %scalar, i32 0
524 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
525 ret <4 x float> %wide
528 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
529 ; CHECK-LABEL: _inreg3:
531 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
533 %in = insertelement <4 x double> undef, double %scalar, i32 0
534 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
535 ret <4 x double> %wide
538 define <8 x float> @_inreg8xfloat(<8 x float> %a) {
539 ; CHECK-LABEL: _inreg8xfloat:
541 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
543 %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
547 define <4 x float> @_inreg4xfloat(<4 x float> %a) {
548 ; CHECK-LABEL: _inreg4xfloat:
550 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
552 %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
556 define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
557 ; CHECK-LABEL: _inreg16xi16:
559 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
561 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
565 define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
566 ; CHECK-LABEL: _inreg8xi16:
568 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
570 %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
574 define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
575 ; CHECK-LABEL: _inreg4xi64:
577 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
579 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
583 define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
584 ; CHECK-LABEL: _inreg2xi64:
586 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
588 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
592 define <4 x double> @_inreg4xdouble(<4 x double> %a) {
593 ; CHECK-LABEL: _inreg4xdouble:
595 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
597 %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
601 define <2 x double> @_inreg2xdouble(<2 x double> %a) {
602 ; CHECK-LABEL: _inreg2xdouble:
604 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
606 %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
610 define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
611 ; CHECK-LABEL: _inreg8xi32:
613 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
615 %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
619 define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
620 ; CHECK-LABEL: _inreg4xi32:
622 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
624 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
628 define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
629 ; CHECK-LABEL: _inreg32xi8:
631 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
633 %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
637 define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
638 ; CHECK-LABEL: _inreg16xi8:
640 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
642 %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
646 ; These tests check that a vbroadcast instruction is used when we have a splat
647 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
648 ; (via the insertelements).
650 define <8 x float> @splat_concat1(float %f) {
651 ; CHECK-LABEL: splat_concat1:
653 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
655 %1 = insertelement <4 x float> undef, float %f, i32 0
656 %2 = insertelement <4 x float> %1, float %f, i32 1
657 %3 = insertelement <4 x float> %2, float %f, i32 2
658 %4 = insertelement <4 x float> %3, float %f, i32 3
659 %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
663 define <8 x float> @splat_concat2(float %f) {
664 ; CHECK-LABEL: splat_concat2:
666 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
668 %1 = insertelement <4 x float> undef, float %f, i32 0
669 %2 = insertelement <4 x float> %1, float %f, i32 1
670 %3 = insertelement <4 x float> %2, float %f, i32 2
671 %4 = insertelement <4 x float> %3, float %f, i32 3
672 %5 = insertelement <4 x float> undef, float %f, i32 0
673 %6 = insertelement <4 x float> %5, float %f, i32 1
674 %7 = insertelement <4 x float> %6, float %f, i32 2
675 %8 = insertelement <4 x float> %7, float %f, i32 3
676 %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
680 define <4 x double> @splat_concat3(double %d) {
681 ; CHECK-LABEL: splat_concat3:
683 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
685 %1 = insertelement <2 x double> undef, double %d, i32 0
686 %2 = insertelement <2 x double> %1, double %d, i32 1
687 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
691 define <4 x double> @splat_concat4(double %d) {
692 ; CHECK-LABEL: splat_concat4:
694 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
696 %1 = insertelement <2 x double> undef, double %d, i32 0
697 %2 = insertelement <2 x double> %1, double %d, i32 1
698 %3 = insertelement <2 x double> undef, double %d, i32 0
699 %4 = insertelement <2 x double> %3, double %d, i32 1
700 %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
704 ; Test cases for <rdar://problem/16074331>.
705 ; Instruction selection for broacast instruction fails if
706 ; the load cannot be folded into the broadcast.
707 ; This happens if the load has initial one use but other uses are
708 ; created later, or if selection DAG cannot prove that folding the
709 ; load will not create a cycle in the DAG.
710 ; Those test cases exerce the latter.
712 ; CHECK-LABEL: isel_crash_16b
713 ; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}}
715 define void @isel_crash_16b(i8* %cV_R.addr) {
717 %__a.addr.i = alloca <2 x i64>, align 16
718 %__b.addr.i = alloca <2 x i64>, align 16
719 %vCr = alloca <2 x i64>, align 16
720 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
721 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
722 %tmp2 = load i8, i8* %cV_R.addr, align 4
723 %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
724 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
725 %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
726 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
727 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
731 ; CHECK-LABEL: isel_crash_32b
732 ; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}}
734 define void @isel_crash_32b(i8* %cV_R.addr) {
736 %__a.addr.i = alloca <4 x i64>, align 16
737 %__b.addr.i = alloca <4 x i64>, align 16
738 %vCr = alloca <4 x i64>, align 16
739 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
740 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
741 %tmp2 = load i8, i8* %cV_R.addr, align 4
742 %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
743 %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
744 %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
745 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
746 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
750 ; CHECK-LABEL: isel_crash_8w
751 ; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}}
753 define void @isel_crash_8w(i16* %cV_R.addr) {
755 %__a.addr.i = alloca <2 x i64>, align 16
756 %__b.addr.i = alloca <2 x i64>, align 16
757 %vCr = alloca <2 x i64>, align 16
758 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
759 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
760 %tmp2 = load i16, i16* %cV_R.addr, align 4
761 %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
762 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
763 %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
764 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
765 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
769 ; CHECK-LABEL: isel_crash_16w
770 ; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}}
772 define void @isel_crash_16w(i16* %cV_R.addr) {
774 %__a.addr.i = alloca <4 x i64>, align 16
775 %__b.addr.i = alloca <4 x i64>, align 16
776 %vCr = alloca <4 x i64>, align 16
777 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
778 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
779 %tmp2 = load i16, i16* %cV_R.addr, align 4
780 %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
781 %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
782 %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
783 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
784 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
788 ; CHECK-LABEL: isel_crash_4d
789 ; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}}
791 define void @isel_crash_4d(i32* %cV_R.addr) {
793 %__a.addr.i = alloca <2 x i64>, align 16
794 %__b.addr.i = alloca <2 x i64>, align 16
795 %vCr = alloca <2 x i64>, align 16
796 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
797 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
798 %tmp2 = load i32, i32* %cV_R.addr, align 4
799 %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
800 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
801 %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
802 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
803 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
807 ; CHECK-LABEL: isel_crash_8d
808 ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
810 define void @isel_crash_8d(i32* %cV_R.addr) {
812 %__a.addr.i = alloca <4 x i64>, align 16
813 %__b.addr.i = alloca <4 x i64>, align 16
814 %vCr = alloca <4 x i64>, align 16
815 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
816 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
817 %tmp2 = load i32, i32* %cV_R.addr, align 4
818 %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
819 %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
820 %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
821 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
822 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
826 ; CHECK-LABEL: isel_crash_2q
827 ; CHECK: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
829 define void @isel_crash_2q(i64* %cV_R.addr) {
831 %__a.addr.i = alloca <2 x i64>, align 16
832 %__b.addr.i = alloca <2 x i64>, align 16
833 %vCr = alloca <2 x i64>, align 16
834 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
835 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
836 %tmp2 = load i64, i64* %cV_R.addr, align 4
837 %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
838 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
839 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
840 store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
844 ; CHECK-LABEL: isel_crash_4q
845 ; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
847 define void @isel_crash_4q(i64* %cV_R.addr) {
849 %__a.addr.i = alloca <4 x i64>, align 16
850 %__b.addr.i = alloca <4 x i64>, align 16
851 %vCr = alloca <4 x i64>, align 16
852 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
853 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
854 %tmp2 = load i64, i64* %cV_R.addr, align 4
855 %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
856 %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
857 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
858 store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16