1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
4 ; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
5 ; CHECK-NEXT: vinsertf128 $1
6 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
8 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
12 ; CHECK: vpunpckhwd %xmm
13 ; CHECK-NEXT: vpshufd $85
14 ; CHECK-NEXT: vinsertf128 $1
15 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
17 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
18 ret <16 x i16> %shuffle
22 ; CHECK-NEXT: vmovlhps %xmm
23 ; CHECK-NEXT: vinsertf128 $1
24 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
26 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
27 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
28 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
29 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
30 ret <4 x i64> %vecinit6.i
33 ; CHECK: vunpcklpd %xmm
34 ; CHECK-NEXT: vinsertf128 $1
35 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
37 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
38 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
39 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
40 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
41 ret <4 x double> %vecinit6.i
44 ; Test this turns into a broadcast:
45 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
48 define <8 x float> @funcE() nounwind {
50 %udx495 = alloca [18 x [18 x float]], align 32
51 br label %for_test505.preheader
53 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
54 br i1 undef, label %for_exit499, label %for_test505.preheader
56 for_exit499: ; preds = %for_test505.preheader
57 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
59 load.i1247: ; preds = %for_exit499
60 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
61 %ptr.i1237 = bitcast float* %ptr1227 to i32*
62 %val.i1238 = load i32* %ptr.i1237, align 4
63 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
64 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
65 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
66 br label %__load_and_broadcast_32.exit1249
68 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
69 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
70 ret <8 x float> %load_broadcast12281250
74 ; CHECK-NEXT: vinsertf128 $1
75 define <8 x float> @funcF(i32 %val) nounwind {
76 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
77 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
78 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
83 ; CHECK-NEXT: vinsertf128 $1
84 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
86 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
87 ret <8 x float> %shuffle
90 ; CHECK: vextractf128 $1
92 ; CHECK-NEXT: vinsertf128 $1
93 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
95 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
96 ret <8 x float> %shuffle