1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
3 ; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
5 ; CHECK: vextractf128 $0
6 ; CHECK-NEXT: punpcklbw
7 ; CHECK-NEXT: punpckhbw
8 ; CHECK-NEXT: vinsertf128 $1
9 ; CHECK-NEXT: vpermilps $85
10 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13 ret <32 x i8> %shuffle
16 ; CHECK: vextractf128 $0
17 ; CHECK-NEXT: punpckhwd
18 ; CHECK-NEXT: vinsertf128 $1
19 ; CHECK-NEXT: vpermilps $85
20 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
22 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
23 ret <16 x i16> %shuffle
27 ; CHECK-NEXT: vinsertf128 $1
28 ; CHECK-NEXT: vpermilps $0
29 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
31 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
32 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
33 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
34 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
35 ret <4 x i64> %vecinit6.i
38 ; CHECK: vinsertf128 $1
39 ; CHECK-NEXT: vpermilps $0
40 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
42 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
43 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
44 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
45 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
46 ret <4 x double> %vecinit6.i
49 ; Test this simple opt:
50 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
52 ; shuffle (vload ptr)), undef, <1, 1, 1, 1>
54 ; CHECK-NEXT: vinsertf128 $1
55 ; CHECK-NEXT: vpermilps $-1
56 define <8 x float> @funcE() nounwind {
58 %udx495 = alloca [18 x [18 x float]], align 32
59 br label %for_test505.preheader
61 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
62 br i1 undef, label %for_exit499, label %for_test505.preheader
64 for_exit499: ; preds = %for_test505.preheader
65 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
67 load.i1247: ; preds = %for_exit499
68 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
69 %ptr.i1237 = bitcast float* %ptr1227 to i32*
70 %val.i1238 = load i32* %ptr.i1237, align 4
71 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
72 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
73 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
74 br label %__load_and_broadcast_32.exit1249
76 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
77 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
78 ret <8 x float> %load_broadcast12281250
81 ; CHECK: vinsertf128 $1
82 ; CHECK-NEXT: vpermilps $0
83 define <8 x float> @funcF(i32 %val) nounwind {
84 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
85 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
86 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
90 ; CHECK: vinsertf128 $1
91 ; CHECK-NEXT: vpermilps $0
92 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
94 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
95 ret <8 x float> %shuffle
98 ; CHECK: vextractf128 $1
99 ; CHECK-NEXT: vinsertf128 $1
100 ; CHECK-NEXT: vpermilps $85
101 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
103 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
104 ret <8 x float> %shuffle