1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
3 ; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
5 ; CHECK: vextractf128 $0
6 ; CHECK-NEXT: punpcklbw
7 ; CHECK-NEXT: punpckhbw
8 ; CHECK-NEXT: vinsertf128 $1
9 ; CHECK-NEXT: vpermilps $85
10 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13 ret <32 x i8> %shuffle
16 ; CHECK: vextractf128 $0
17 ; CHECK-NEXT: punpckhwd
18 ; CHECK-NEXT: vinsertf128 $1
19 ; CHECK-NEXT: vpermilps $85
20 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
22 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
23 ret <16 x i16> %shuffle
28 ; CHECK-NEXT: vinsertf128 $1
29 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
31 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
32 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
33 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
34 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
35 ret <4 x i64> %vecinit6.i
39 ; CHECK-NEXT: vinsertf128 $1
40 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
42 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
43 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
44 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
45 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
46 ret <4 x double> %vecinit6.i
49 ; Test this simple opt:
50 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
52 ; shuffle (vload ptr)), undef, <1, 1, 1, 1>
54 ; CHECK-NEXT: vinsertf128 $1
55 ; CHECK-NEXT: vpermilps $-1
56 define <8 x float> @funcE() nounwind {
58 %udx495 = alloca [18 x [18 x float]], align 32
59 br label %for_test505.preheader
61 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
62 br i1 undef, label %for_exit499, label %for_test505.preheader
64 for_exit499: ; preds = %for_test505.preheader
65 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
67 load.i1247: ; preds = %for_exit499
68 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
69 %ptr.i1237 = bitcast float* %ptr1227 to i32*
70 %val.i1238 = load i32* %ptr.i1237, align 4
71 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
72 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
73 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
74 br label %__load_and_broadcast_32.exit1249
76 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
77 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
78 ret <8 x float> %load_broadcast12281250
82 ; CHECK-NEXT: vinsertf128 $1
83 define <8 x float> @funcF(i32* %ptr) nounwind {
84 %val = load i32* %ptr, align 4
85 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
86 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
87 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
91 ; CHECK: vinsertf128 $1
92 ; CHECK-NEXT: vpermilps $0
93 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
95 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
96 ret <8 x float> %shuffle
99 ; CHECK: vextractf128 $1
100 ; CHECK-NEXT: vinsertf128 $1
101 ; CHECK-NEXT: vpermilps $85
102 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
104 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
105 ret <8 x float> %shuffle