1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
4 ; CHECK: vpunpcklbw %xmm
5 ; CHECK-NEXT: vpunpckhbw %xmm
6 ; CHECK-NEXT: vpshufd $85
7 ; CHECK-NEXT: vinsertf128 $1
8 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
10 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
11 ret <32 x i8> %shuffle
14 ; CHECK: vpunpckhwd %xmm
15 ; CHECK-NEXT: vpshufd $85
16 ; CHECK-NEXT: vinsertf128 $1
17 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
19 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
20 ret <16 x i16> %shuffle
24 ; CHECK-NEXT: vmovlhps %xmm
25 ; CHECK-NEXT: vinsertf128 $1
26 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
28 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
29 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
30 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
31 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
32 ret <4 x i64> %vecinit6.i
36 ; CHECK-NEXT: vinsertf128 $1
37 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
39 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
40 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
41 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
42 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
43 ret <4 x double> %vecinit6.i
46 ; Test this turns into a broadcast:
47 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
50 define <8 x float> @funcE() nounwind {
52 %udx495 = alloca [18 x [18 x float]], align 32
53 br label %for_test505.preheader
55 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
56 br i1 undef, label %for_exit499, label %for_test505.preheader
58 for_exit499: ; preds = %for_test505.preheader
59 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
61 load.i1247: ; preds = %for_exit499
62 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
63 %ptr.i1237 = bitcast float* %ptr1227 to i32*
64 %val.i1238 = load i32* %ptr.i1237, align 4
65 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
66 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
67 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
68 br label %__load_and_broadcast_32.exit1249
70 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
71 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
72 ret <8 x float> %load_broadcast12281250
76 ; CHECK-NEXT: vinsertf128 $1
77 define <8 x float> @funcF(i32 %val) nounwind {
78 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
79 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
80 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
85 ; CHECK-NEXT: vinsertf128 $1
86 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
88 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
89 ret <8 x float> %shuffle
92 ; CHECK: vextractf128 $1
94 ; CHECK-NEXT: vinsertf128 $1
95 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
97 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
98 ret <8 x float> %shuffle