1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
4 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
6 ; CHECK: ## BB#0: ## %entry
7 ; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0
10 %q = load i8, i8* %ptr, align 4
11 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
12 %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
13 %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
14 %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
15 %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
16 %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
17 %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
18 %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
19 %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
20 %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
21 %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
22 %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
23 %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
24 %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
25 %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
26 %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
30 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
32 ; CHECK: ## BB#0: ## %entry
33 ; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0
36 %q = load i8, i8* %ptr, align 4
37 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
38 %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
39 %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
40 %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
41 %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
42 %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
43 %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
44 %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
45 %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
46 %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
47 %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
48 %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
49 %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
50 %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
51 %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
52 %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
54 %q20 = insertelement <32 x i8> %qf, i8 %q, i32 16
55 %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
56 %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
57 %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
58 %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
59 %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
60 %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
61 %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
62 %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
63 %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
64 %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
65 %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
66 %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
67 %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
68 %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
69 %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
73 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
75 ; CHECK: ## BB#0: ## %entry
76 ; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0
79 %q = load i16, i16* %ptr, align 4
80 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
81 %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
82 %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
83 %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
84 %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
85 %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
86 %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
87 %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
91 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
93 ; CHECK: ## BB#0: ## %entry
94 ; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0
97 %q = load i16, i16* %ptr, align 4
98 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
99 %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
100 %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
101 %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
102 %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
103 %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
104 %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
105 %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
106 %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
107 %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
108 %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
109 %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
110 %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
111 %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
112 %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
113 %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
117 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
119 ; CHECK: ## BB#0: ## %entry
120 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
123 %q = load i32, i32* %ptr, align 4
124 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
125 %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
126 %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
127 %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
131 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
133 ; CHECK: ## BB#0: ## %entry
134 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
137 %q = load i32, i32* %ptr, align 4
138 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
139 %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
140 %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
141 %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
142 %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
143 %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
144 %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
145 %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
149 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
151 ; CHECK: ## BB#0: ## %entry
152 ; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
155 %q = load i64, i64* %ptr, align 4
156 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
157 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
161 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
163 ; CHECK: ## BB#0: ## %entry
164 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
167 %q = load i64, i64* %ptr, align 4
168 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
169 %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
170 %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
171 %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
175 ; FIXME: Pointer adjusted broadcasts
177 define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
178 ; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
179 ; CHECK: ## BB#0: ## %entry
180 ; CHECK-NEXT: vpbroadcastb 1(%rdi), %xmm0
183 %ld = load <16 x i8>, <16 x i8>* %ptr
184 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
188 define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
189 ; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
190 ; CHECK: ## BB#0: ## %entry
191 ; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
194 %ld = load <16 x i8>, <16 x i8>* %ptr
195 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
199 define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
200 ; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
201 ; CHECK: ## BB#0: ## %entry
202 ; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
205 %ld = load <32 x i8>, <32 x i8>* %ptr
206 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
210 define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
211 ; CHECK-LABEL: load_splat_8i16_8i16_11111111:
212 ; CHECK: ## BB#0: ## %entry
213 ; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm0
216 %ld = load <8 x i16>, <8 x i16>* %ptr
217 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
221 define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
222 ; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
223 ; CHECK: ## BB#0: ## %entry
224 ; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
227 %ld = load <8 x i16>, <8 x i16>* %ptr
228 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
232 define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
233 ; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
234 ; CHECK: ## BB#0: ## %entry
235 ; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
238 %ld = load <16 x i16>, <16 x i16>* %ptr
239 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
243 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
244 ; CHECK-LABEL: load_splat_4i32_4i32_1111:
245 ; CHECK: ## BB#0: ## %entry
246 ; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
249 %ld = load <4 x i32>, <4 x i32>* %ptr
250 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
254 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
255 ; CHECK-LABEL: load_splat_8i32_4i32_33333333:
256 ; CHECK: ## BB#0: ## %entry
257 ; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
260 %ld = load <4 x i32>, <4 x i32>* %ptr
261 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
265 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
266 ; CHECK-LABEL: load_splat_8i32_8i32_55555555:
267 ; CHECK: ## BB#0: ## %entry
268 ; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
271 %ld = load <8 x i32>, <8 x i32>* %ptr
272 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
276 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
277 ; CHECK-LABEL: load_splat_4f32_4f32_1111:
278 ; CHECK: ## BB#0: ## %entry
279 ; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
282 %ld = load <4 x float>, <4 x float>* %ptr
283 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
287 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
288 ; CHECK-LABEL: load_splat_8f32_4f32_33333333:
289 ; CHECK: ## BB#0: ## %entry
290 ; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
293 %ld = load <4 x float>, <4 x float>* %ptr
294 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
298 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
299 ; CHECK-LABEL: load_splat_8f32_8f32_55555555:
300 ; CHECK: ## BB#0: ## %entry
301 ; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
304 %ld = load <8 x float>, <8 x float>* %ptr
305 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
309 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
310 ; CHECK-LABEL: load_splat_2i64_2i64_1111:
311 ; CHECK: ## BB#0: ## %entry
312 ; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm0
315 %ld = load <2 x i64>, <2 x i64>* %ptr
316 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
320 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
321 ; CHECK-LABEL: load_splat_4i64_2i64_1111:
322 ; CHECK: ## BB#0: ## %entry
323 ; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
326 %ld = load <2 x i64>, <2 x i64>* %ptr
327 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
331 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
332 ; CHECK-LABEL: load_splat_4i64_4i64_2222:
333 ; CHECK: ## BB#0: ## %entry
334 ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
337 %ld = load <4 x i64>, <4 x i64>* %ptr
338 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
342 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
343 ; CHECK-LABEL: load_splat_2f64_2f64_1111:
344 ; CHECK: ## BB#0: ## %entry
345 ; CHECK-NEXT: vmovaps (%rdi), %xmm0
346 ; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
349 %ld = load <2 x double>, <2 x double>* %ptr
350 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
351 ret <2 x double> %ret
354 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
355 ; CHECK-LABEL: load_splat_4f64_2f64_1111:
356 ; CHECK: ## BB#0: ## %entry
357 ; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
360 %ld = load <2 x double>, <2 x double>* %ptr
361 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
362 ret <4 x double> %ret
365 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
366 ; CHECK-LABEL: load_splat_4f64_4f64_2222:
367 ; CHECK: ## BB#0: ## %entry
368 ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
371 %ld = load <4 x double>, <4 x double>* %ptr
372 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
373 ret <4 x double> %ret
376 ; make sure that we still don't support broadcast double into 128-bit vector
378 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
380 ; CHECK: ## BB#0: ## %entry
381 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
384 %q = load double, double* %ptr, align 4
385 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
386 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
387 ret <2 x double> %vecinit2.i
390 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
392 ; CHECK: ## BB#0: ## %entry
393 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
394 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
397 %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
401 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
403 ; CHECK: ## BB#0: ## %entry
404 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
405 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
408 %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
412 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
415 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
417 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
418 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
419 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
420 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
421 ret <4 x float> %vecinit6.i
424 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
427 ; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
429 %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
430 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
431 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
432 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
433 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
434 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
435 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
436 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
437 ret <8 x i8> %vecinit7.i
441 define void @crash() nounwind alwaysinline {
442 ; CHECK-LABEL: crash:
443 ; CHECK: ## BB#0: ## %WGLoopsEntry
444 ; CHECK-NEXT: xorl %eax, %eax
445 ; CHECK-NEXT: testb %al, %al
446 ; CHECK-NEXT: je LBB31_1
447 ; CHECK-NEXT: ## BB#2: ## %ret
449 ; CHECK-NEXT: .align 4, 0x90
450 ; CHECK-NEXT: LBB31_1: ## %footer349VF
451 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
452 ; CHECK-NEXT: jmp LBB31_1
454 br i1 undef, label %ret, label %footer329VF
457 %A.0.inVF = fmul float undef, 6.553600e+04
458 %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
459 %A.0VF = fptosi float %A.0.inVF to i32
460 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
461 %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
462 %1 = and i32 %A.0VF, 65535
463 %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
464 %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
465 br i1 undef, label %preload1201VF, label %footer349VF
468 br label %footer349VF
471 %2 = mul nsw <8 x i32> undef, %0
472 %3 = mul nsw <8 x i32> undef, %vector1099VF
473 br label %footer329VF
479 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
480 ; CHECK-LABEL: _inreg0:
482 ; CHECK-NEXT: vmovd %edi, %xmm0
483 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
485 %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
486 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
490 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
491 ; CHECK-LABEL: _inreg1:
493 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
495 %in = insertelement <8 x float> undef, float %scalar, i32 0
496 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
497 ret <8 x float> %wide
500 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
501 ; CHECK-LABEL: _inreg2:
503 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
505 %in = insertelement <4 x float> undef, float %scalar, i32 0
506 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
507 ret <4 x float> %wide
510 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
511 ; CHECK-LABEL: _inreg3:
513 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
515 %in = insertelement <4 x double> undef, double %scalar, i32 0
516 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
517 ret <4 x double> %wide
520 define <8 x float> @_inreg8xfloat(<8 x float> %a) {
521 ; CHECK-LABEL: _inreg8xfloat:
523 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
525 %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
529 define <4 x float> @_inreg4xfloat(<4 x float> %a) {
530 ; CHECK-LABEL: _inreg4xfloat:
532 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
534 %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
538 define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
539 ; CHECK-LABEL: _inreg16xi16:
541 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
543 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
547 define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
548 ; CHECK-LABEL: _inreg8xi16:
550 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
552 %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
556 define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
557 ; CHECK-LABEL: _inreg4xi64:
559 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
561 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
565 define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
566 ; CHECK-LABEL: _inreg2xi64:
568 ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
570 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
574 define <4 x double> @_inreg4xdouble(<4 x double> %a) {
575 ; CHECK-LABEL: _inreg4xdouble:
577 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
579 %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
583 define <2 x double> @_inreg2xdouble(<2 x double> %a) {
584 ; CHECK-LABEL: _inreg2xdouble:
586 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
588 %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
592 define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
593 ; CHECK-LABEL: _inreg8xi32:
595 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
597 %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
601 define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
602 ; CHECK-LABEL: _inreg4xi32:
604 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
606 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
610 define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
611 ; CHECK-LABEL: _inreg32xi8:
613 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
615 %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
619 define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
620 ; CHECK-LABEL: _inreg16xi8:
622 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
624 %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
628 ; These tests check that a vbroadcast instruction is used when we have a splat
629 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
630 ; (via the insertelements).
632 define <8 x float> @splat_concat1(float %f) {
633 ; CHECK-LABEL: splat_concat1:
635 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
637 %1 = insertelement <4 x float> undef, float %f, i32 0
638 %2 = insertelement <4 x float> %1, float %f, i32 1
639 %3 = insertelement <4 x float> %2, float %f, i32 2
640 %4 = insertelement <4 x float> %3, float %f, i32 3
641 %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
645 define <8 x float> @splat_concat2(float %f) {
646 ; CHECK-LABEL: splat_concat2:
648 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
650 %1 = insertelement <4 x float> undef, float %f, i32 0
651 %2 = insertelement <4 x float> %1, float %f, i32 1
652 %3 = insertelement <4 x float> %2, float %f, i32 2
653 %4 = insertelement <4 x float> %3, float %f, i32 3
654 %5 = insertelement <4 x float> undef, float %f, i32 0
655 %6 = insertelement <4 x float> %5, float %f, i32 1
656 %7 = insertelement <4 x float> %6, float %f, i32 2
657 %8 = insertelement <4 x float> %7, float %f, i32 3
658 %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
662 define <4 x double> @splat_concat3(double %d) {
663 ; CHECK-LABEL: splat_concat3:
665 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
667 %1 = insertelement <2 x double> undef, double %d, i32 0
668 %2 = insertelement <2 x double> %1, double %d, i32 1
669 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
673 define <4 x double> @splat_concat4(double %d) {
674 ; CHECK-LABEL: splat_concat4:
676 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
678 %1 = insertelement <2 x double> undef, double %d, i32 0
679 %2 = insertelement <2 x double> %1, double %d, i32 1
680 %3 = insertelement <2 x double> undef, double %d, i32 0
681 %4 = insertelement <2 x double> %3, double %d, i32 1
682 %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
686 ; Test cases for <rdar://problem/16074331>.
687 ; Instruction selection for broacast instruction fails if
688 ; the load cannot be folded into the broadcast.
689 ; This happens if the load has initial one use but other uses are
690 ; created later, or if selection DAG cannot prove that folding the
691 ; load will not create a cycle in the DAG.
692 ; Those test cases exerce the latter.
694 ; CHECK-LABEL: isel_crash_16b
695 ; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}}
697 define void @isel_crash_16b(i8* %cV_R.addr) {
699 %__a.addr.i = alloca <2 x i64>, align 16
700 %__b.addr.i = alloca <2 x i64>, align 16
701 %vCr = alloca <2 x i64>, align 16
702 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
703 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
704 %tmp2 = load i8, i8* %cV_R.addr, align 4
705 %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
706 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
707 %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
708 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
709 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
713 ; CHECK-LABEL: isel_crash_32b
714 ; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}}
716 define void @isel_crash_32b(i8* %cV_R.addr) {
718 %__a.addr.i = alloca <4 x i64>, align 16
719 %__b.addr.i = alloca <4 x i64>, align 16
720 %vCr = alloca <4 x i64>, align 16
721 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
722 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
723 %tmp2 = load i8, i8* %cV_R.addr, align 4
724 %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
725 %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
726 %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
727 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
728 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
732 ; CHECK-LABEL: isel_crash_8w
733 ; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}}
735 define void @isel_crash_8w(i16* %cV_R.addr) {
737 %__a.addr.i = alloca <2 x i64>, align 16
738 %__b.addr.i = alloca <2 x i64>, align 16
739 %vCr = alloca <2 x i64>, align 16
740 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
741 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
742 %tmp2 = load i16, i16* %cV_R.addr, align 4
743 %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
744 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
745 %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
746 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
747 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
751 ; CHECK-LABEL: isel_crash_16w
752 ; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}}
754 define void @isel_crash_16w(i16* %cV_R.addr) {
756 %__a.addr.i = alloca <4 x i64>, align 16
757 %__b.addr.i = alloca <4 x i64>, align 16
758 %vCr = alloca <4 x i64>, align 16
759 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
760 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
761 %tmp2 = load i16, i16* %cV_R.addr, align 4
762 %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
763 %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
764 %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
765 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
766 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
770 ; CHECK-LABEL: isel_crash_4d
771 ; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}}
773 define void @isel_crash_4d(i32* %cV_R.addr) {
775 %__a.addr.i = alloca <2 x i64>, align 16
776 %__b.addr.i = alloca <2 x i64>, align 16
777 %vCr = alloca <2 x i64>, align 16
778 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
779 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
780 %tmp2 = load i32, i32* %cV_R.addr, align 4
781 %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
782 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
783 %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
784 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
785 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
789 ; CHECK-LABEL: isel_crash_8d
790 ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
792 define void @isel_crash_8d(i32* %cV_R.addr) {
794 %__a.addr.i = alloca <4 x i64>, align 16
795 %__b.addr.i = alloca <4 x i64>, align 16
796 %vCr = alloca <4 x i64>, align 16
797 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
798 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
799 %tmp2 = load i32, i32* %cV_R.addr, align 4
800 %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
801 %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
802 %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
803 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
804 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
808 ; CHECK-LABEL: isel_crash_2q
809 ; CHECK: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
811 define void @isel_crash_2q(i64* %cV_R.addr) {
813 %__a.addr.i = alloca <2 x i64>, align 16
814 %__b.addr.i = alloca <2 x i64>, align 16
815 %vCr = alloca <2 x i64>, align 16
816 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
817 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
818 %tmp2 = load i64, i64* %cV_R.addr, align 4
819 %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
820 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
821 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
822 store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
826 ; CHECK-LABEL: isel_crash_4q
827 ; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
829 define void @isel_crash_4q(i64* %cV_R.addr) {
831 %__a.addr.i = alloca <4 x i64>, align 16
832 %__b.addr.i = alloca <4 x i64>, align 16
833 %vCr = alloca <4 x i64>, align 16
834 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
835 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
836 %tmp2 = load i64, i64* %cV_R.addr, align 4
837 %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
838 %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
839 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
840 store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16