1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
5 ; Check constant loads of every 128-bit and 256-bit vector type
6 ; for size optimization using splat ops available with AVX and AVX2.
8 ; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
9 define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
10 ; CHECK-LABEL: splat_v2f64:
12 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
13 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
15 %add = fadd <2 x double> %x, <double 1.0, double 1.0>
19 define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
20 ; CHECK-LABEL: splat_v4f64:
22 ; CHECK-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
23 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
25 %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
29 define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
30 ; CHECK-LABEL: splat_v4f32:
32 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
33 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
35 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
39 define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
40 ; CHECK-LABEL: splat_v8f32:
42 ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
43 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
45 %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
49 ; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
50 ; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
51 define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
52 ; CHECK-LABEL: splat_v2i64:
54 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
55 ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0
57 %add = add <2 x i64> %x, <i64 1, i64 1>
61 ; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
62 ; and then we fake it: use vmovddup to splat 64-bit value.
63 define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
64 ; AVX-LABEL: splat_v4i64:
66 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
67 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
68 ; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
69 ; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
70 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
73 ; AVX2-LABEL: splat_v4i64:
75 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
76 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
78 %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
82 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
83 define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
84 ; AVX-LABEL: splat_v4i32:
86 ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
87 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
90 ; AVX2-LABEL: splat_v4i32:
92 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
93 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
95 %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
99 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
100 define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
101 ; AVX-LABEL: splat_v8i32:
103 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
104 ; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
105 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
106 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
107 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
110 ; AVX2-LABEL: splat_v8i32:
112 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
113 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
115 %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
119 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
120 define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
121 ; AVX-LABEL: splat_v8i16:
123 ; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
126 ; AVX2-LABEL: splat_v8i16:
128 ; AVX2-NEXT: vpbroadcastw {{.*}}(%rip), %xmm1
129 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
131 %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
135 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
136 define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
137 ; AVX-LABEL: splat_v16i16:
139 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
140 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
141 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
142 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
143 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
146 ; AVX2-LABEL: splat_v16i16:
148 ; AVX2-NEXT: vpbroadcastw {{.*}}(%rip), %ymm1
149 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
151 %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
155 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
156 define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
157 ; AVX-LABEL: splat_v16i8:
159 ; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
162 ; AVX2-LABEL: splat_v16i8:
164 ; AVX2-NEXT: vpbroadcastb {{.*}}(%rip), %xmm1
165 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
167 %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
171 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
172 define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
173 ; AVX-LABEL: splat_v32i8:
175 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
176 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
177 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
178 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
179 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
182 ; AVX2-LABEL: splat_v32i8:
184 ; AVX2-NEXT: vpbroadcastb {{.*}}(%rip), %ymm1
185 ; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
187 %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
191 ; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
192 ; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
193 ; loadi64 with multiple uses.
195 @A = common global <3 x i64> zeroinitializer, align 32
197 define <8 x i64> @pr23259() #1 {
199 %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
200 %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
201 %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
202 ret <8 x i64> %shuffle
205 attributes #0 = { optsize }
206 attributes #1 = { minsize }