1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
5 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
7 ; X32: ## BB#0: ## %entry
8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
9 ; X32-NEXT: vpbroadcastb (%eax), %xmm0
13 ; X64: ## BB#0: ## %entry
14 ; X64-NEXT: vpbroadcastb (%rdi), %xmm0
17 %q = load i8, i8* %ptr, align 4
18 %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
19 %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
20 %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
21 %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
22 %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
23 %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
24 %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
25 %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
26 %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
27 %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
28 %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
29 %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
30 %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
31 %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
32 %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
33 %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
37 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
39 ; X32: ## BB#0: ## %entry
40 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
41 ; X32-NEXT: vpbroadcastb (%eax), %ymm0
45 ; X64: ## BB#0: ## %entry
46 ; X64-NEXT: vpbroadcastb (%rdi), %ymm0
49 %q = load i8, i8* %ptr, align 4
50 %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
51 %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
52 %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
53 %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
54 %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
55 %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
56 %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
57 %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
58 %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
59 %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
60 %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
61 %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
62 %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
63 %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
64 %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
65 %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
67 %q20 = insertelement <32 x i8> %qf, i8 %q, i32 16
68 %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
69 %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
70 %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
71 %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
72 %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
73 %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
74 %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
75 %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
76 %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
77 %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
78 %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
79 %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
80 %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
81 %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
82 %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
86 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
88 ; X32: ## BB#0: ## %entry
89 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
90 ; X32-NEXT: vpbroadcastw (%eax), %xmm0
94 ; X64: ## BB#0: ## %entry
95 ; X64-NEXT: vpbroadcastw (%rdi), %xmm0
98 %q = load i16, i16* %ptr, align 4
99 %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
100 %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
101 %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
102 %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
103 %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
104 %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
105 %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
106 %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
110 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
112 ; X32: ## BB#0: ## %entry
113 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
114 ; X32-NEXT: vpbroadcastw (%eax), %ymm0
118 ; X64: ## BB#0: ## %entry
119 ; X64-NEXT: vpbroadcastw (%rdi), %ymm0
122 %q = load i16, i16* %ptr, align 4
123 %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
124 %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
125 %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
126 %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
127 %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
128 %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
129 %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
130 %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
131 %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
132 %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
133 %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
134 %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
135 %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
136 %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
137 %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
138 %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
142 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
144 ; X32: ## BB#0: ## %entry
145 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
146 ; X32-NEXT: vbroadcastss (%eax), %xmm0
150 ; X64: ## BB#0: ## %entry
151 ; X64-NEXT: vbroadcastss (%rdi), %xmm0
154 %q = load i32, i32* %ptr, align 4
155 %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
156 %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
157 %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
158 %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
162 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
164 ; X32: ## BB#0: ## %entry
165 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
166 ; X32-NEXT: vbroadcastss (%eax), %ymm0
170 ; X64: ## BB#0: ## %entry
171 ; X64-NEXT: vbroadcastss (%rdi), %ymm0
174 %q = load i32, i32* %ptr, align 4
175 %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
176 %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
177 %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
178 %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
179 %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
180 %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
181 %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
182 %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
186 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
188 ; X32: ## BB#0: ## %entry
189 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
190 ; X32-NEXT: movl (%eax), %ecx
191 ; X32-NEXT: movl 4(%eax), %eax
192 ; X32-NEXT: vmovd %ecx, %xmm0
193 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
194 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
195 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
199 ; X64: ## BB#0: ## %entry
200 ; X64-NEXT: vpbroadcastq (%rdi), %xmm0
203 %q = load i64, i64* %ptr, align 4
204 %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
205 %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
209 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
211 ; X32: ## BB#0: ## %entry
212 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
213 ; X32-NEXT: movl (%eax), %ecx
214 ; X32-NEXT: movl 4(%eax), %eax
215 ; X32-NEXT: vmovd %ecx, %xmm0
216 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
217 ; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
218 ; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
219 ; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
223 ; X64: ## BB#0: ## %entry
224 ; X64-NEXT: vbroadcastsd (%rdi), %ymm0
227 %q = load i64, i64* %ptr, align 4
228 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
229 %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
230 %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
231 %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
235 ; FIXME: Pointer adjusted broadcasts
237 define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
238 ; X32-LABEL: load_splat_16i8_16i8_1111111111111111:
239 ; X32: ## BB#0: ## %entry
240 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; X32-NEXT: vpbroadcastb 1(%eax), %xmm0
244 ; X64-LABEL: load_splat_16i8_16i8_1111111111111111:
245 ; X64: ## BB#0: ## %entry
246 ; X64-NEXT: vpbroadcastb 1(%rdi), %xmm0
249 %ld = load <16 x i8>, <16 x i8>* %ptr
250 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
254 define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
255 ; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
256 ; X32: ## BB#0: ## %entry
257 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
258 ; X32-NEXT: vpbroadcastb 1(%eax), %ymm0
261 ; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
262 ; X64: ## BB#0: ## %entry
263 ; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0
266 %ld = load <16 x i8>, <16 x i8>* %ptr
267 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
271 define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
272 ; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
273 ; X32: ## BB#0: ## %entry
274 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
275 ; X32-NEXT: vpbroadcastb 1(%eax), %ymm0
278 ; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
279 ; X64: ## BB#0: ## %entry
280 ; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0
283 %ld = load <32 x i8>, <32 x i8>* %ptr
284 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
288 define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
289 ; X32-LABEL: load_splat_8i16_8i16_11111111:
290 ; X32: ## BB#0: ## %entry
291 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
292 ; X32-NEXT: vpbroadcastw 2(%eax), %xmm0
295 ; X64-LABEL: load_splat_8i16_8i16_11111111:
296 ; X64: ## BB#0: ## %entry
297 ; X64-NEXT: vpbroadcastw 2(%rdi), %xmm0
300 %ld = load <8 x i16>, <8 x i16>* %ptr
301 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
305 define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
306 ; X32-LABEL: load_splat_16i16_8i16_1111111111111111:
307 ; X32: ## BB#0: ## %entry
308 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
309 ; X32-NEXT: vpbroadcastw 2(%eax), %ymm0
312 ; X64-LABEL: load_splat_16i16_8i16_1111111111111111:
313 ; X64: ## BB#0: ## %entry
314 ; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0
317 %ld = load <8 x i16>, <8 x i16>* %ptr
318 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
322 define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
323 ; X32-LABEL: load_splat_16i16_16i16_1111111111111111:
324 ; X32: ## BB#0: ## %entry
325 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
326 ; X32-NEXT: vpbroadcastw 2(%eax), %ymm0
329 ; X64-LABEL: load_splat_16i16_16i16_1111111111111111:
330 ; X64: ## BB#0: ## %entry
331 ; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0
334 %ld = load <16 x i16>, <16 x i16>* %ptr
335 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
339 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
340 ; X32-LABEL: load_splat_4i32_4i32_1111:
341 ; X32: ## BB#0: ## %entry
342 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
343 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0
346 ; X64-LABEL: load_splat_4i32_4i32_1111:
347 ; X64: ## BB#0: ## %entry
348 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0
351 %ld = load <4 x i32>, <4 x i32>* %ptr
352 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
356 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
357 ; X32-LABEL: load_splat_8i32_4i32_33333333:
358 ; X32: ## BB#0: ## %entry
359 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
360 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0
363 ; X64-LABEL: load_splat_8i32_4i32_33333333:
364 ; X64: ## BB#0: ## %entry
365 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
368 %ld = load <4 x i32>, <4 x i32>* %ptr
369 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
373 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
374 ; X32-LABEL: load_splat_8i32_8i32_55555555:
375 ; X32: ## BB#0: ## %entry
376 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
377 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0
380 ; X64-LABEL: load_splat_8i32_8i32_55555555:
381 ; X64: ## BB#0: ## %entry
382 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
385 %ld = load <8 x i32>, <8 x i32>* %ptr
386 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
390 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
391 ; X32-LABEL: load_splat_4f32_4f32_1111:
392 ; X32: ## BB#0: ## %entry
393 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
394 ; X32-NEXT: vbroadcastss 4(%eax), %xmm0
397 ; X64-LABEL: load_splat_4f32_4f32_1111:
398 ; X64: ## BB#0: ## %entry
399 ; X64-NEXT: vbroadcastss 4(%rdi), %xmm0
402 %ld = load <4 x float>, <4 x float>* %ptr
403 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
407 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
408 ; X32-LABEL: load_splat_8f32_4f32_33333333:
409 ; X32: ## BB#0: ## %entry
410 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
411 ; X32-NEXT: vbroadcastss 12(%eax), %ymm0
414 ; X64-LABEL: load_splat_8f32_4f32_33333333:
415 ; X64: ## BB#0: ## %entry
416 ; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
419 %ld = load <4 x float>, <4 x float>* %ptr
420 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
424 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
425 ; X32-LABEL: load_splat_8f32_8f32_55555555:
426 ; X32: ## BB#0: ## %entry
427 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
428 ; X32-NEXT: vbroadcastss 20(%eax), %ymm0
431 ; X64-LABEL: load_splat_8f32_8f32_55555555:
432 ; X64: ## BB#0: ## %entry
433 ; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
436 %ld = load <8 x float>, <8 x float>* %ptr
437 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
441 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
442 ; X32-LABEL: load_splat_2i64_2i64_1111:
443 ; X32: ## BB#0: ## %entry
444 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
445 ; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
446 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
449 ; X64-LABEL: load_splat_2i64_2i64_1111:
450 ; X64: ## BB#0: ## %entry
451 ; X64-NEXT: vpbroadcastq 8(%rdi), %xmm0
454 %ld = load <2 x i64>, <2 x i64>* %ptr
455 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
459 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
460 ; X32-LABEL: load_splat_4i64_2i64_1111:
461 ; X32: ## BB#0: ## %entry
462 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
463 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
466 ; X64-LABEL: load_splat_4i64_2i64_1111:
467 ; X64: ## BB#0: ## %entry
468 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
471 %ld = load <2 x i64>, <2 x i64>* %ptr
472 %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
476 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
477 ; X32-LABEL: load_splat_4i64_4i64_2222:
478 ; X32: ## BB#0: ## %entry
479 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
480 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
483 ; X64-LABEL: load_splat_4i64_4i64_2222:
484 ; X64: ## BB#0: ## %entry
485 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
488 %ld = load <4 x i64>, <4 x i64>* %ptr
489 %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
493 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
494 ; X32-LABEL: load_splat_2f64_2f64_1111:
495 ; X32: ## BB#0: ## %entry
496 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
497 ; X32-NEXT: vmovaps (%eax), %xmm0
498 ; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
501 ; X64-LABEL: load_splat_2f64_2f64_1111:
502 ; X64: ## BB#0: ## %entry
503 ; X64-NEXT: vmovaps (%rdi), %xmm0
504 ; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
507 %ld = load <2 x double>, <2 x double>* %ptr
508 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
509 ret <2 x double> %ret
512 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
513 ; X32-LABEL: load_splat_4f64_2f64_1111:
514 ; X32: ## BB#0: ## %entry
515 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
516 ; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
519 ; X64-LABEL: load_splat_4f64_2f64_1111:
520 ; X64: ## BB#0: ## %entry
521 ; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
524 %ld = load <2 x double>, <2 x double>* %ptr
525 %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
526 ret <4 x double> %ret
529 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
530 ; X32-LABEL: load_splat_4f64_4f64_2222:
531 ; X32: ## BB#0: ## %entry
532 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
533 ; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
536 ; X64-LABEL: load_splat_4f64_4f64_2222:
537 ; X64: ## BB#0: ## %entry
538 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
541 %ld = load <4 x double>, <4 x double>* %ptr
542 %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
543 ret <4 x double> %ret
546 ; make sure that we still don't support broadcast double into 128-bit vector
548 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
550 ; X32: ## BB#0: ## %entry
551 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
552 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
556 ; X64: ## BB#0: ## %entry
557 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
560 %q = load double, double* %ptr, align 4
561 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
562 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
563 ret <2 x double> %vecinit2.i
566 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
568 ; X32: ## BB#0: ## %entry
569 ; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1
570 ; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
574 ; X64: ## BB#0: ## %entry
575 ; X64-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
576 ; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
579 %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
583 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
585 ; X32: ## BB#0: ## %entry
586 ; X32-NEXT: vbroadcastss LCPI28_0, %ymm1
587 ; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0
591 ; X64: ## BB#0: ## %entry
592 ; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
593 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0
596 %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
600 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
603 ; X32-NEXT: vbroadcastss LCPI29_0, %xmm0
608 ; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
610 %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
611 %vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
612 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
613 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
614 ret <4 x float> %vecinit6.i
617 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
620 ; X32-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
625 ; X64-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
627 %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
628 %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
629 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
630 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
631 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
632 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
633 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
634 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
635 ret <8 x i8> %vecinit7.i
638 define void @crash() nounwind alwaysinline {
640 ; X32: ## BB#0: ## %WGLoopsEntry
641 ; X32-NEXT: xorl %eax, %eax
642 ; X32-NEXT: testb %al, %al
643 ; X32-NEXT: je LBB31_1
644 ; X32-NEXT: ## BB#2: ## %ret
646 ; X32-NEXT: .align 4, 0x90
647 ; X32-NEXT: LBB31_1: ## %footer349VF
648 ; X32-NEXT: ## =>This Inner Loop Header: Depth=1
649 ; X32-NEXT: jmp LBB31_1
652 ; X64: ## BB#0: ## %WGLoopsEntry
653 ; X64-NEXT: xorl %eax, %eax
654 ; X64-NEXT: testb %al, %al
655 ; X64-NEXT: je LBB31_1
656 ; X64-NEXT: ## BB#2: ## %ret
658 ; X64-NEXT: .align 4, 0x90
659 ; X64-NEXT: LBB31_1: ## %footer349VF
660 ; X64-NEXT: ## =>This Inner Loop Header: Depth=1
661 ; X64-NEXT: jmp LBB31_1
663 br i1 undef, label %ret, label %footer329VF
666 %A.0.inVF = fmul float undef, 6.553600e+04
667 %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
668 %A.0VF = fptosi float %A.0.inVF to i32
669 %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
670 %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
671 %1 = and i32 %A.0VF, 65535
672 %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
673 %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
674 br i1 undef, label %preload1201VF, label %footer349VF
677 br label %footer349VF
680 %2 = mul nsw <8 x i32> undef, %0
681 %3 = mul nsw <8 x i32> undef, %vector1099VF
682 br label %footer329VF
688 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
689 ; X32-LABEL: _inreg0:
691 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
694 ; X64-LABEL: _inreg0:
696 ; X64-NEXT: vmovd %edi, %xmm0
697 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
699 %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
700 %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
704 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
705 ; X32-LABEL: _inreg1:
707 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
710 ; X64-LABEL: _inreg1:
712 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
714 %in = insertelement <8 x float> undef, float %scalar, i32 0
715 %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
716 ret <8 x float> %wide
719 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
720 ; X32-LABEL: _inreg2:
722 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
725 ; X64-LABEL: _inreg2:
727 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
729 %in = insertelement <4 x float> undef, float %scalar, i32 0
730 %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
731 ret <4 x float> %wide
734 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
735 ; X32-LABEL: _inreg3:
737 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
740 ; X64-LABEL: _inreg3:
742 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
744 %in = insertelement <4 x double> undef, double %scalar, i32 0
745 %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
746 ret <4 x double> %wide
749 define <8 x float> @_inreg8xfloat(<8 x float> %a) {
750 ; X32-LABEL: _inreg8xfloat:
752 ; X32-NEXT: vbroadcastss %xmm0, %ymm0
755 ; X64-LABEL: _inreg8xfloat:
757 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
759 %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
763 define <4 x float> @_inreg4xfloat(<4 x float> %a) {
764 ; X32-LABEL: _inreg4xfloat:
766 ; X32-NEXT: vbroadcastss %xmm0, %xmm0
769 ; X64-LABEL: _inreg4xfloat:
771 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
773 %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
777 define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
778 ; X32-LABEL: _inreg16xi16:
780 ; X32-NEXT: vpbroadcastw %xmm0, %ymm0
783 ; X64-LABEL: _inreg16xi16:
785 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0
787 %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
791 define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
792 ; X32-LABEL: _inreg8xi16:
794 ; X32-NEXT: vpbroadcastw %xmm0, %xmm0
797 ; X64-LABEL: _inreg8xi16:
799 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0
801 %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
805 define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
806 ; X32-LABEL: _inreg4xi64:
808 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0
811 ; X64-LABEL: _inreg4xi64:
813 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
815 %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
819 define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
820 ; X32-LABEL: _inreg2xi64:
822 ; X32-NEXT: vpbroadcastq %xmm0, %xmm0
825 ; X64-LABEL: _inreg2xi64:
827 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0
829 %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
833 define <4 x double> @_inreg4xdouble(<4 x double> %a) {
834 ; X32-LABEL: _inreg4xdouble:
836 ; X32-NEXT: vbroadcastsd %xmm0, %ymm0
839 ; X64-LABEL: _inreg4xdouble:
841 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
843 %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
847 define <2 x double> @_inreg2xdouble(<2 x double> %a) {
848 ; X32-LABEL: _inreg2xdouble:
850 ; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
853 ; X64-LABEL: _inreg2xdouble:
855 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
857 %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
861 define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
862 ; X32-LABEL: _inreg8xi32:
864 ; X32-NEXT: vbroadcastss %xmm0, %ymm0
867 ; X64-LABEL: _inreg8xi32:
869 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
871 %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
875 define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
876 ; X32-LABEL: _inreg4xi32:
878 ; X32-NEXT: vbroadcastss %xmm0, %xmm0
881 ; X64-LABEL: _inreg4xi32:
883 ; X64-NEXT: vbroadcastss %xmm0, %xmm0
885 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
889 define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
890 ; X32-LABEL: _inreg32xi8:
892 ; X32-NEXT: vpbroadcastb %xmm0, %ymm0
895 ; X64-LABEL: _inreg32xi8:
897 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0
899 %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
903 define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
904 ; X32-LABEL: _inreg16xi8:
906 ; X32-NEXT: vpbroadcastb %xmm0, %xmm0
909 ; X64-LABEL: _inreg16xi8:
911 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0
913 %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
917 ; These tests check that a vbroadcast instruction is used when we have a splat
918 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
919 ; (via the insertelements).
921 define <8 x float> @splat_concat1(float %f) {
922 ; X32-LABEL: splat_concat1:
924 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
927 ; X64-LABEL: splat_concat1:
929 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
931 %1 = insertelement <4 x float> undef, float %f, i32 0
932 %2 = insertelement <4 x float> %1, float %f, i32 1
933 %3 = insertelement <4 x float> %2, float %f, i32 2
934 %4 = insertelement <4 x float> %3, float %f, i32 3
935 %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
939 define <8 x float> @splat_concat2(float %f) {
940 ; X32-LABEL: splat_concat2:
942 ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
945 ; X64-LABEL: splat_concat2:
947 ; X64-NEXT: vbroadcastss %xmm0, %ymm0
949 %1 = insertelement <4 x float> undef, float %f, i32 0
950 %2 = insertelement <4 x float> %1, float %f, i32 1
951 %3 = insertelement <4 x float> %2, float %f, i32 2
952 %4 = insertelement <4 x float> %3, float %f, i32 3
953 %5 = insertelement <4 x float> undef, float %f, i32 0
954 %6 = insertelement <4 x float> %5, float %f, i32 1
955 %7 = insertelement <4 x float> %6, float %f, i32 2
956 %8 = insertelement <4 x float> %7, float %f, i32 3
957 %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
961 define <4 x double> @splat_concat3(double %d) {
962 ; X32-LABEL: splat_concat3:
964 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
967 ; X64-LABEL: splat_concat3:
969 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
971 %1 = insertelement <2 x double> undef, double %d, i32 0
972 %2 = insertelement <2 x double> %1, double %d, i32 1
973 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
977 define <4 x double> @splat_concat4(double %d) {
978 ; X32-LABEL: splat_concat4:
980 ; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
983 ; X64-LABEL: splat_concat4:
985 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0
987 %1 = insertelement <2 x double> undef, double %d, i32 0
988 %2 = insertelement <2 x double> %1, double %d, i32 1
989 %3 = insertelement <2 x double> undef, double %d, i32 0
990 %4 = insertelement <2 x double> %3, double %d, i32 1
991 %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
995 ; Test cases for <rdar://problem/16074331>.
996 ; Instruction selection for broacast instruction fails if
997 ; the load cannot be folded into the broadcast.
998 ; This happens if the load has initial one use but other uses are
999 ; created later, or if selection DAG cannot prove that folding the
1000 ; load will not create a cycle in the DAG.
1001 ; Those test cases exerce the latter.
1003 ; CHECK-LABEL: isel_crash_16b
1004 ; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}}
1006 define void @isel_crash_16b(i8* %cV_R.addr) {
1008 %__a.addr.i = alloca <2 x i64>, align 16
1009 %__b.addr.i = alloca <2 x i64>, align 16
1010 %vCr = alloca <2 x i64>, align 16
1011 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1012 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1013 %tmp2 = load i8, i8* %cV_R.addr, align 4
1014 %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
1015 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
1016 %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
1017 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1018 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1022 ; CHECK-LABEL: isel_crash_32b
1023 ; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}}
1025 define void @isel_crash_32b(i8* %cV_R.addr) {
1027 %__a.addr.i = alloca <4 x i64>, align 16
1028 %__b.addr.i = alloca <4 x i64>, align 16
1029 %vCr = alloca <4 x i64>, align 16
1030 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1031 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1032 %tmp2 = load i8, i8* %cV_R.addr, align 4
1033 %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
1034 %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
1035 %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
1036 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1037 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1041 ; CHECK-LABEL: isel_crash_8w
1042 ; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}}
1044 define void @isel_crash_8w(i16* %cV_R.addr) {
1046 %__a.addr.i = alloca <2 x i64>, align 16
1047 %__b.addr.i = alloca <2 x i64>, align 16
1048 %vCr = alloca <2 x i64>, align 16
1049 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1050 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1051 %tmp2 = load i16, i16* %cV_R.addr, align 4
1052 %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
1053 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
1054 %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
1055 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1056 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1060 ; CHECK-LABEL: isel_crash_16w
1061 ; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}}
1063 define void @isel_crash_16w(i16* %cV_R.addr) {
1065 %__a.addr.i = alloca <4 x i64>, align 16
1066 %__b.addr.i = alloca <4 x i64>, align 16
1067 %vCr = alloca <4 x i64>, align 16
1068 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1069 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1070 %tmp2 = load i16, i16* %cV_R.addr, align 4
1071 %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
1072 %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
1073 %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
1074 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1075 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1079 ; CHECK-LABEL: isel_crash_4d
1080 ; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}}
1082 define void @isel_crash_4d(i32* %cV_R.addr) {
1084 %__a.addr.i = alloca <2 x i64>, align 16
1085 %__b.addr.i = alloca <2 x i64>, align 16
1086 %vCr = alloca <2 x i64>, align 16
1087 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1088 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1089 %tmp2 = load i32, i32* %cV_R.addr, align 4
1090 %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
1091 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1092 %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
1093 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1094 store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1098 ; CHECK-LABEL: isel_crash_8d
1099 ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
1101 define void @isel_crash_8d(i32* %cV_R.addr) {
1103 %__a.addr.i = alloca <4 x i64>, align 16
1104 %__b.addr.i = alloca <4 x i64>, align 16
1105 %vCr = alloca <4 x i64>, align 16
1106 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1107 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1108 %tmp2 = load i32, i32* %cV_R.addr, align 4
1109 %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
1110 %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
1111 %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
1112 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1113 store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1117 ; X64-LABEL: isel_crash_2q
1118 ; X64: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
1120 define void @isel_crash_2q(i64* %cV_R.addr) {
1122 %__a.addr.i = alloca <2 x i64>, align 16
1123 %__b.addr.i = alloca <2 x i64>, align 16
1124 %vCr = alloca <2 x i64>, align 16
1125 store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1126 %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1127 %tmp2 = load i64, i64* %cV_R.addr, align 4
1128 %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
1129 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
1130 store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1131 store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
1135 ; X64-LABEL: isel_crash_4q
1136 ; X64: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
1138 define void @isel_crash_4q(i64* %cV_R.addr) {
1140 %__a.addr.i = alloca <4 x i64>, align 16
1141 %__b.addr.i = alloca <4 x i64>, align 16
1142 %vCr = alloca <4 x i64>, align 16
1143 store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1144 %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1145 %tmp2 = load i64, i64* %cV_R.addr, align 4
1146 %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
1147 %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
1148 store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1149 store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16