1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
3 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
5 ; CHECK: ## BB#0: ## %entry
6 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
7 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
10 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
11 ret <32 x i8> %shuffle
14 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
16 ; CHECK: ## BB#0: ## %entry
17 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
18 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
21 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
22 ret <16 x i16> %shuffle
25 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
27 ; CHECK: ## BB#0: ## %entry
28 ; CHECK-NEXT: vmovq %rdi, %xmm0
29 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
30 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
33 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
34 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
35 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
36 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
37 ret <4 x i64> %vecinit6.i
40 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
42 ; CHECK: ## BB#0: ## %entry
43 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
44 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
47 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
48 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
49 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
50 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
51 ret <4 x double> %vecinit6.i
54 ; Test this turns into a broadcast:
55 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
57 define <8 x float> @funcE() nounwind {
59 ; CHECK: ## BB#0: ## %for_exit499
60 ; CHECK-NEXT: pushq %rbp
61 ; CHECK-NEXT: movq %rsp, %rbp
62 ; CHECK-NEXT: andq $-32, %rsp
63 ; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520
64 ; CHECK-NEXT: xorl %eax, %eax
65 ; CHECK-NEXT: ## implicit-def: %YMM0
66 ; CHECK-NEXT: testb %al, %al
67 ; CHECK-NEXT: jne LBB4_2
68 ; CHECK-NEXT: ## BB#1: ## %load.i1247
69 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
70 ; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249
71 ; CHECK-NEXT: movq %rbp, %rsp
72 ; CHECK-NEXT: popq %rbp
75 %udx495 = alloca [18 x [18 x float]], align 32
76 br label %for_test505.preheader
78 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
79 br i1 undef, label %for_exit499, label %for_test505.preheader
81 for_exit499: ; preds = %for_test505.preheader
82 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
84 load.i1247: ; preds = %for_exit499
85 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
86 %ptr.i1237 = bitcast float* %ptr1227 to i32*
87 %val.i1238 = load i32, i32* %ptr.i1237, align 4
88 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
89 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
90 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
91 br label %__load_and_broadcast_32.exit1249
93 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
94 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
95 ret <8 x float> %load_broadcast12281250
98 define <8 x float> @funcF(i32 %val) nounwind {
101 ; CHECK-NEXT: vmovd %edi, %xmm0
102 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,0]
103 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
105 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
106 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
107 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
111 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
112 ; CHECK-LABEL: funcG:
113 ; CHECK: ## BB#0: ## %entry
114 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
115 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
118 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
119 ret <8 x float> %shuffle
122 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
123 ; CHECK-LABEL: funcH:
124 ; CHECK: ## BB#0: ## %entry
125 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
126 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
127 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
130 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
131 ret <8 x float> %shuffle
134 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
135 ; CHECK-LABEL: splat_load_2f64_11:
137 ; CHECK-NEXT: vmovaps (%rdi), %xmm0
138 ; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
140 %x = load <2 x double>, <2 x double>* %ptr
141 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
145 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
146 ; CHECK-LABEL: splat_load_4f64_2222:
148 ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
150 %x = load <4 x double>, <4 x double>* %ptr
151 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
155 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
156 ; CHECK-LABEL: splat_load_4f32_0000:
158 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
160 %x = load <4 x float>, <4 x float>* %ptr
161 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
165 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
166 ; CHECK-LABEL: splat_load_8f32_77777777:
168 ; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0
170 %x = load <8 x float>, <8 x float>* %ptr
171 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>