1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-unknown"
6 ; Stack reload folding tests.
8 ; By including a nop call with sideeffects we can force a partial register spill of the
9 ; relevant registers and check that the reload is correctly folded into the instruction.
11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
12 ;CHECK-LABEL: stack_fold_addpd
13 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15 %2 = fadd <2 x double> %a0, %a1
19 define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) {
20 ;CHECK-LABEL: stack_fold_addpd_ymm
21 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
23 %2 = fadd <4 x double> %a0, %a1
27 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
28 ;CHECK-LABEL: stack_fold_addps
29 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
31 %2 = fadd <4 x float> %a0, %a1
35 define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) {
36 ;CHECK-LABEL: stack_fold_addps_ymm
37 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
39 %2 = fadd <8 x float> %a0, %a1
43 define double @stack_fold_addsd(double %a0, double %a1) {
44 ;CHECK-LABEL: stack_fold_addsd
45 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
46 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
47 %2 = fadd double %a0, %a1
51 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
52 ;CHECK-LABEL: stack_fold_addsd_int
53 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
54 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
55 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1)
58 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
60 define float @stack_fold_addss(float %a0, float %a1) {
61 ;CHECK-LABEL: stack_fold_addss
62 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
63 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
64 %2 = fadd float %a0, %a1
68 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
69 ;CHECK-LABEL: stack_fold_addss_int
70 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
71 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
72 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1)
75 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
77 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
78 ;CHECK-LABEL: stack_fold_addsubpd
79 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
80 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
81 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
84 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
86 define <4 x double> @stack_fold_addsubpd_ymm(<4 x double> %a0, <4 x double> %a1) {
87 ;CHECK-LABEL: stack_fold_addsubpd_ymm
88 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
89 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
90 %2 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
93 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
95 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
96 ;CHECK-LABEL: stack_fold_addsubps
97 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
98 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
99 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
102 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
104 define <8 x float> @stack_fold_addsubps_ymm(<8 x float> %a0, <8 x float> %a1) {
105 ;CHECK-LABEL: stack_fold_addsubps_ymm
106 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
107 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
108 %2 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
111 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
113 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
114 ;CHECK-LABEL: stack_fold_andnpd
115 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
116 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
117 %2 = bitcast <2 x double> %a0 to <2 x i64>
118 %3 = bitcast <2 x double> %a1 to <2 x i64>
119 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
120 %5 = and <2 x i64> %4, %3
121 %6 = bitcast <2 x i64> %5 to <2 x double>
122 ; fadd forces execution domain
123 %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
127 define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) {
128 ;CHECK-LABEL: stack_fold_andnpd_ymm
129 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
130 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
131 %2 = bitcast <4 x double> %a0 to <4 x i64>
132 %3 = bitcast <4 x double> %a1 to <4 x i64>
133 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
134 %5 = and <4 x i64> %4, %3
135 %6 = bitcast <4 x i64> %5 to <4 x double>
136 ; fadd forces execution domain
137 %7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0>
141 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
142 ;CHECK-LABEL: stack_fold_andnps
143 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
145 %2 = bitcast <4 x float> %a0 to <2 x i64>
146 %3 = bitcast <4 x float> %a1 to <2 x i64>
147 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
148 %5 = and <2 x i64> %4, %3
149 %6 = bitcast <2 x i64> %5 to <4 x float>
150 ; fadd forces execution domain
151 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
155 define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) {
156 ;CHECK-LABEL: stack_fold_andnps_ymm
157 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
158 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
159 %2 = bitcast <8 x float> %a0 to <4 x i64>
160 %3 = bitcast <8 x float> %a1 to <4 x i64>
161 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
162 %5 = and <4 x i64> %4, %3
163 %6 = bitcast <4 x i64> %5 to <8 x float>
164 ; fadd forces execution domain
165 %7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
169 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
170 ;CHECK-LABEL: stack_fold_andpd
171 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
173 %2 = bitcast <2 x double> %a0 to <2 x i64>
174 %3 = bitcast <2 x double> %a1 to <2 x i64>
175 %4 = and <2 x i64> %2, %3
176 %5 = bitcast <2 x i64> %4 to <2 x double>
177 ; fadd forces execution domain
178 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
182 define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) {
183 ;CHECK-LABEL: stack_fold_andpd_ymm
184 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
185 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
186 %2 = bitcast <4 x double> %a0 to <4 x i64>
187 %3 = bitcast <4 x double> %a1 to <4 x i64>
188 %4 = and <4 x i64> %2, %3
189 %5 = bitcast <4 x i64> %4 to <4 x double>
190 ; fadd forces execution domain
191 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
195 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
196 ;CHECK-LABEL: stack_fold_andps
197 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
198 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
199 %2 = bitcast <4 x float> %a0 to <2 x i64>
200 %3 = bitcast <4 x float> %a1 to <2 x i64>
201 %4 = and <2 x i64> %2, %3
202 %5 = bitcast <2 x i64> %4 to <4 x float>
203 ; fadd forces execution domain
204 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
208 define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) {
209 ;CHECK-LABEL: stack_fold_andps_ymm
210 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
211 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
212 %2 = bitcast <8 x float> %a0 to <4 x i64>
213 %3 = bitcast <8 x float> %a1 to <4 x i64>
214 %4 = and <4 x i64> %2, %3
215 %5 = bitcast <4 x i64> %4 to <8 x float>
216 ; fadd forces execution domain
217 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
221 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
222 ;CHECK-LABEL: stack_fold_blendpd
223 ;CHECK: vblendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
224 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
225 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
229 define <4 x double> @stack_fold_blendpd_ymm(<4 x double> %a0, <4 x double> %a1) {
230 ;CHECK-LABEL: stack_fold_blendpd_ymm
231 ;CHECK: vblendpd $6, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
233 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x double> %a0, <4 x double> %a1
237 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
238 ;CHECK-LABEL: stack_fold_blendps
239 ;CHECK: vblendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
240 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
241 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
245 define <8 x float> @stack_fold_blendps_ymm(<8 x float> %a0, <8 x float> %a1) {
246 ;CHECK-LABEL: stack_fold_blendps_ymm
247 ;CHECK: vblendps $102, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
248 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
249 %2 = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %a0, <8 x float> %a1
253 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
254 ;CHECK-LABEL: stack_fold_blendvpd
255 ;CHECK: vblendvpd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
256 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
257 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
260 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
262 define <4 x double> @stack_fold_blendvpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %c) {
263 ;CHECK-LABEL: stack_fold_blendvpd_ymm
264 ;CHECK: vblendvpd {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
265 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
266 %2 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a1, <4 x double> %c, <4 x double> %a0)
269 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
271 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
272 ;CHECK-LABEL: stack_fold_blendvps
273 ;CHECK: vblendvps {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
274 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
275 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
278 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
280 define <8 x float> @stack_fold_blendvps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %c) {
281 ;CHECK-LABEL: stack_fold_blendvps_ymm
282 ;CHECK: vblendvps {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
283 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
284 %2 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a1, <8 x float> %c, <8 x float> %a0)
287 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
289 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
290 ;CHECK-LABEL: stack_fold_cmppd
291 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
292 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
293 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
296 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
298 define <4 x double> @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) {
299 ;CHECK-LABEL: stack_fold_cmppd_ymm
300 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
301 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
302 %2 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
305 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
307 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
308 ;CHECK-LABEL: stack_fold_cmpps
309 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
310 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
311 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
314 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
316 define <8 x float> @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) {
317 ;CHECK-LABEL: stack_fold_cmpps_ymm
318 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
319 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
320 %2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0)
323 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
325 define i32 @stack_fold_cmpsd(double %a0, double %a1) {
326 ;CHECK-LABEL: stack_fold_cmpsd
327 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
329 %2 = fcmp oeq double %a0, %a1
330 %3 = zext i1 %2 to i32
334 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
335 ;CHECK-LABEL: stack_fold_cmpsd_int
336 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
338 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
341 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
343 define i32 @stack_fold_cmpss(float %a0, float %a1) {
344 ;CHECK-LABEL: stack_fold_cmpss
345 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
347 %2 = fcmp oeq float %a0, %a1
348 %3 = zext i1 %2 to i32
352 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
353 ;CHECK-LABEL: stack_fold_cmpss_int
354 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
355 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
356 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
359 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
361 ; TODO stack_fold_comisd
363 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
364 ;CHECK-LABEL: stack_fold_comisd_int
365 ;CHECK: vcomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
366 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
367 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
370 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
372 ; TODO stack_fold_comiss
374 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
375 ;CHECK-LABEL: stack_fold_comiss_int
376 ;CHECK: vcomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
377 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
378 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
381 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
383 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
384 ;CHECK-LABEL: stack_fold_cvtdq2pd
385 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
386 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
387 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0)
390 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
392 define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) {
393 ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm
394 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
395 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
396 %2 = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0)
399 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
401 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
402 ;CHECK-LABEL: stack_fold_cvtdq2ps
403 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
404 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
405 %2 = sitofp <4 x i32> %a0 to <4 x float>
409 define <8 x float> @stack_fold_cvtdq2ps_ymm(<8 x i32> %a0) {
410 ;CHECK-LABEL: stack_fold_cvtdq2ps_ymm
411 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
412 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
413 %2 = sitofp <8 x i32> %a0 to <8 x float>
417 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
418 ;CHECK-LABEL: stack_fold_cvtpd2dq
419 ;CHECK: vcvtpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
420 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
421 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
424 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
426 define <4 x i32> @stack_fold_cvtpd2dq_ymm(<4 x double> %a0) {
427 ;CHECK-LABEL: stack_fold_cvtpd2dq_ymm
428 ;CHECK: vcvtpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
429 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
430 %2 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
433 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
435 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
436 ;CHECK-LABEL: stack_fold_cvtpd2ps
437 ;CHECK: vcvtpd2psx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
438 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
439 %2 = fptrunc <2 x double> %a0 to <2 x float>
443 define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) {
444 ;CHECK-LABEL: stack_fold_cvtpd2ps_ymm
445 ;CHECK: vcvtpd2psy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
446 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
447 %2 = fptrunc <4 x double> %a0 to <4 x float>
451 define <4 x float> @stack_fold_cvtph2ps(<8 x i16> %a0) {
452 ;CHECK-LABEL: stack_fold_cvtph2ps
453 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
454 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
455 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
458 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
460 define <8 x float> @stack_fold_cvtph2ps_ymm(<8 x i16> %a0) {
461 ;CHECK-LABEL: stack_fold_cvtph2ps_ymm
462 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
463 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
464 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
467 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
469 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
470 ;CHECK-LABEL: stack_fold_cvtps2dq
471 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
472 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
473 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
476 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
478 define <8 x i32> @stack_fold_cvtps2dq_ymm(<8 x float> %a0) {
479 ;CHECK-LABEL: stack_fold_cvtps2dq_ymm
480 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
481 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
482 %2 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
485 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
487 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
488 ;CHECK-LABEL: stack_fold_cvtps2pd
489 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
490 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
491 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
494 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
496 define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) {
497 ;CHECK-LABEL: stack_fold_cvtps2pd_ymm
498 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
499 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
500 %2 = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0)
503 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
505 define <8 x i16> @stack_fold_cvtps2ph(<4 x float> %a0) {
506 ;CHECK-LABEL: stack_fold_cvtps2ph
507 ;CHECK: vcvtps2ph $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
508 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
509 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
512 declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
514 define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) {
515 ;CHECK-LABEL: stack_fold_cvtps2ph_ymm
516 ;CHECK: vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
517 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
518 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
521 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
523 ; TODO stack_fold_cvtsd2si
525 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
526 ;CHECK-LABEL: stack_fold_cvtsd2si_int
527 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
528 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
529 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
532 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
534 ; TODO stack_fold_cvtsd2si64
536 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
537 ;CHECK-LABEL: stack_fold_cvtsd2si64_int
538 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
540 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
543 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
545 ; TODO stack_fold_cvtsd2ss
547 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) {
548 ;CHECK-LABEL: stack_fold_cvtsd2ss_int
549 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
550 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
551 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
554 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
556 define double @stack_fold_cvtsi2sd(i32 %a0) {
557 ;CHECK-LABEL: stack_fold_cvtsi2sd
558 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
559 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
560 %2 = sitofp i32 %a0 to double
564 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
565 ;CHECK-LABEL: stack_fold_cvtsi2sd_int
566 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
567 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
568 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0)
571 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
573 define double @stack_fold_cvtsi642sd(i64 %a0) {
574 ;CHECK-LABEL: stack_fold_cvtsi642sd
575 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
576 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
577 %2 = sitofp i64 %a0 to double
581 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
582 ;CHECK-LABEL: stack_fold_cvtsi642sd_int
583 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
585 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0)
588 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
590 define float @stack_fold_cvtsi2ss(i32 %a0) {
591 ;CHECK-LABEL: stack_fold_cvtsi2ss
592 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
593 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
594 %2 = sitofp i32 %a0 to float
598 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
599 ;CHECK-LABEL: stack_fold_cvtsi2ss_int
600 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
601 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
602 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0)
605 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
607 define float @stack_fold_cvtsi642ss(i64 %a0) {
608 ;CHECK-LABEL: stack_fold_cvtsi642ss
609 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
610 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
611 %2 = sitofp i64 %a0 to float
615 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
616 ;CHECK-LABEL: stack_fold_cvtsi642ss_int
617 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
618 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
619 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0)
622 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
624 ; TODO stack_fold_cvtss2sd
626 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) {
627 ;CHECK-LABEL: stack_fold_cvtss2sd_int
628 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
629 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
630 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0)
633 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
635 ; TODO stack_fold_cvtss2si
637 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
638 ;CHECK-LABEL: stack_fold_cvtss2si_int
639 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
640 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
641 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
644 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
646 ; TODO stack_fold_cvtss2si64
648 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
649 ;CHECK-LABEL: stack_fold_cvtss2si64_int
650 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
652 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
655 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
657 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
658 ;CHECK-LABEL: stack_fold_cvttpd2dq
659 ;CHECK: vcvttpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
660 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
661 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
664 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
666 define <4 x i32> @stack_fold_cvttpd2dq_ymm(<4 x double> %a0) {
667 ;CHECK-LABEL: stack_fold_cvttpd2dq_ymm
668 ;CHECK: vcvttpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
669 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
670 %2 = fptosi <4 x double> %a0 to <4 x i32>
674 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
675 ;CHECK-LABEL: stack_fold_cvttps2dq
676 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
677 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
678 %2 = fptosi <4 x float> %a0 to <4 x i32>
682 define <8 x i32> @stack_fold_cvttps2dq_ymm(<8 x float> %a0) {
683 ;CHECK-LABEL: stack_fold_cvttps2dq_ymm
684 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
685 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
686 %2 = fptosi <8 x float> %a0 to <8 x i32>
690 define i32 @stack_fold_cvttsd2si(double %a0) {
691 ;CHECK-LABEL: stack_fold_cvttsd2si
692 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload
693 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
694 %2 = fptosi double %a0 to i32
698 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
699 ;CHECK-LABEL: stack_fold_cvttsd2si_int
700 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
701 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
702 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
705 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
707 define i64 @stack_fold_cvttsd2si64(double %a0) {
708 ;CHECK-LABEL: stack_fold_cvttsd2si64
709 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
710 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
711 %2 = fptosi double %a0 to i64
715 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
716 ;CHECK-LABEL: stack_fold_cvttsd2si64_int
717 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
718 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
719 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
722 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
724 define i32 @stack_fold_cvttss2si(float %a0) {
725 ;CHECK-LABEL: stack_fold_cvttss2si
726 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
727 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
728 %2 = fptosi float %a0 to i32
732 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
733 ;CHECK-LABEL: stack_fold_cvttss2si_int
734 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
735 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
736 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
739 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
741 define i64 @stack_fold_cvttss2si64(float %a0) {
742 ;CHECK-LABEL: stack_fold_cvttss2si64
743 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
744 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
745 %2 = fptosi float %a0 to i64
749 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
750 ;CHECK-LABEL: stack_fold_cvttss2si64_int
751 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
753 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
756 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
758 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
759 ;CHECK-LABEL: stack_fold_divpd
760 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
762 %2 = fdiv <2 x double> %a0, %a1
766 define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) {
767 ;CHECK-LABEL: stack_fold_divpd_ymm
768 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
769 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
770 %2 = fdiv <4 x double> %a0, %a1
774 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
775 ;CHECK-LABEL: stack_fold_divps
776 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
777 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
778 %2 = fdiv <4 x float> %a0, %a1
782 define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) {
783 ;CHECK-LABEL: stack_fold_divps_ymm
784 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
786 %2 = fdiv <8 x float> %a0, %a1
790 define double @stack_fold_divsd(double %a0, double %a1) {
791 ;CHECK-LABEL: stack_fold_divsd
792 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
793 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
794 %2 = fdiv double %a0, %a1
798 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
799 ;CHECK-LABEL: stack_fold_divsd_int
800 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
801 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
802 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1)
805 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
807 define float @stack_fold_divss(float %a0, float %a1) {
808 ;CHECK-LABEL: stack_fold_divss
809 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
811 %2 = fdiv float %a0, %a1
815 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
816 ;CHECK-LABEL: stack_fold_divss_int
817 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
818 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
819 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1)
822 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
824 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
825 ;CHECK-LABEL: stack_fold_dppd
826 ;CHECK: vdppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
827 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
828 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
831 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
833 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
834 ;CHECK-LABEL: stack_fold_dpps
835 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
836 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
837 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
840 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
842 define <8 x float> @stack_fold_dpps_ymm(<8 x float> %a0, <8 x float> %a1) {
843 ;CHECK-LABEL: stack_fold_dpps_ymm
844 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
845 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
846 %2 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7)
849 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
851 define <4 x float> @stack_fold_extractf128(<8 x float> %a0, <8 x float> %a1) {
852 ;CHECK-LABEL: stack_fold_extractf128
853 ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
854 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
855 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
859 define i32 @stack_fold_extractps(<4 x float> %a0) {
860 ;CHECK-LABEL: stack_fold_extractps
861 ;CHECK: vextractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
862 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
863 %1 = extractelement <4 x float> %a0, i32 1
864 %2 = bitcast float %1 to i32
865 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
869 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
870 ;CHECK-LABEL: stack_fold_haddpd
871 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
872 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
873 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
876 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
878 define <4 x double> @stack_fold_haddpd_ymm(<4 x double> %a0, <4 x double> %a1) {
879 ;CHECK-LABEL: stack_fold_haddpd_ymm
880 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
881 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
882 %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
885 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
887 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
888 ;CHECK-LABEL: stack_fold_haddps
889 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
890 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
891 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
894 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
896 define <8 x float> @stack_fold_haddps_ymm(<8 x float> %a0, <8 x float> %a1) {
897 ;CHECK-LABEL: stack_fold_haddps_ymm
898 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
899 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
900 %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
903 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
905 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
906 ;CHECK-LABEL: stack_fold_hsubpd
907 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
908 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
909 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
912 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
914 define <4 x double> @stack_fold_hsubpd_ymm(<4 x double> %a0, <4 x double> %a1) {
915 ;CHECK-LABEL: stack_fold_hsubpd_ymm
916 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
917 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
918 %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
921 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
923 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
924 ;CHECK-LABEL: stack_fold_hsubps
925 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
926 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
927 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
930 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
932 define <8 x float> @stack_fold_hsubps_ymm(<8 x float> %a0, <8 x float> %a1) {
933 ;CHECK-LABEL: stack_fold_hsubps_ymm
934 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
935 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
936 %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
939 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
941 define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) {
942 ;CHECK-LABEL: stack_fold_insertf128
943 ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
945 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
949 define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
950 ;CHECK-LABEL: stack_fold_insertps
951 ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
952 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
953 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
954 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
957 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
959 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
960 ;CHECK-LABEL: stack_fold_maxpd
961 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
963 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
966 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
968 define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) {
969 ;CHECK-LABEL: stack_fold_maxpd_ymm
970 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
971 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
972 %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
975 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
977 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
978 ;CHECK-LABEL: stack_fold_maxps
979 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
980 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
981 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
984 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
986 define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) {
987 ;CHECK-LABEL: stack_fold_maxps_ymm
988 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
989 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
990 %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
993 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
995 define double @stack_fold_maxsd(double %a0, double %a1) {
996 ;CHECK-LABEL: stack_fold_maxsd
997 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
998 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
999 %2 = fcmp ogt double %a0, %a1
1000 %3 = select i1 %2, double %a0, double %a1
1004 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) {
1005 ;CHECK-LABEL: stack_fold_maxsd_int
1006 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1007 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1008 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1011 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1013 define float @stack_fold_maxss(float %a0, float %a1) {
1014 ;CHECK-LABEL: stack_fold_maxss
1015 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1016 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1017 %2 = fcmp ogt float %a0, %a1
1018 %3 = select i1 %2, float %a0, float %a1
1022 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) {
1023 ;CHECK-LABEL: stack_fold_maxss_int
1024 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1025 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1026 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1029 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1031 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) {
1032 ;CHECK-LABEL: stack_fold_minpd
1033 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1034 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1035 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1038 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1040 define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1041 ;CHECK-LABEL: stack_fold_minpd_ymm
1042 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1043 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1044 %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
1047 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
1049 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
1050 ;CHECK-LABEL: stack_fold_minps
1051 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1052 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1053 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1056 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1058 define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) {
1059 ;CHECK-LABEL: stack_fold_minps_ymm
1060 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1061 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1062 %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
1065 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
1067 define double @stack_fold_minsd(double %a0, double %a1) {
1068 ;CHECK-LABEL: stack_fold_minsd
1069 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1070 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1071 %2 = fcmp olt double %a0, %a1
1072 %3 = select i1 %2, double %a0, double %a1
1076 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
1077 ;CHECK-LABEL: stack_fold_minsd_int
1078 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1079 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1080 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1083 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1085 define float @stack_fold_minss(float %a0, float %a1) {
1086 ;CHECK-LABEL: stack_fold_minss
1087 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1089 %2 = fcmp olt float %a0, %a1
1090 %3 = select i1 %2, float %a0, float %a1
1094 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
1095 ;CHECK-LABEL: stack_fold_minss_int
1096 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1097 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1098 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1101 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1103 define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
1104 ;CHECK-LABEL: stack_fold_movddup
1105 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1106 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1107 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
1111 define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) {
1112 ;CHECK-LABEL: stack_fold_movddup_ymm
1113 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1114 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1115 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1119 ; TODO stack_fold_movhpd (load / store)
1120 ; TODO stack_fold_movhps (load / store)
1122 ; TODO stack_fold_movlpd (load / store)
1123 ; TODO stack_fold_movlps (load / store)
1125 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
1126 ;CHECK-LABEL: stack_fold_movshdup
1127 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1128 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1129 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1133 define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) {
1134 ;CHECK-LABEL: stack_fold_movshdup_ymm
1135 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1137 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
1141 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
1142 ;CHECK-LABEL: stack_fold_movsldup
1143 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1145 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1149 define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) {
1150 ;CHECK-LABEL: stack_fold_movsldup_ymm
1151 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1153 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1157 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
1158 ;CHECK-LABEL: stack_fold_mulpd
1159 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1160 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1161 %2 = fmul <2 x double> %a0, %a1
1165 define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1166 ;CHECK-LABEL: stack_fold_mulpd_ymm
1167 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1168 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1169 %2 = fmul <4 x double> %a0, %a1
1173 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
1174 ;CHECK-LABEL: stack_fold_mulps
1175 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1176 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1177 %2 = fmul <4 x float> %a0, %a1
1181 define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) {
1182 ;CHECK-LABEL: stack_fold_mulps_ymm
1183 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1184 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1185 %2 = fmul <8 x float> %a0, %a1
1189 define double @stack_fold_mulsd(double %a0, double %a1) {
1190 ;CHECK-LABEL: stack_fold_mulsd
1191 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1192 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1193 %2 = fmul double %a0, %a1
1197 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
1198 ;CHECK-LABEL: stack_fold_mulsd_int
1199 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1200 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1201 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1)
1204 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
1206 define float @stack_fold_mulss(float %a0, float %a1) {
1207 ;CHECK-LABEL: stack_fold_mulss
1208 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1209 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1210 %2 = fmul float %a0, %a1
1214 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
1215 ;CHECK-LABEL: stack_fold_mulss_int
1216 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1217 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1218 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1)
1221 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
1223 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
1224 ;CHECK-LABEL: stack_fold_orpd
1225 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1227 %2 = bitcast <2 x double> %a0 to <2 x i64>
1228 %3 = bitcast <2 x double> %a1 to <2 x i64>
1229 %4 = or <2 x i64> %2, %3
1230 %5 = bitcast <2 x i64> %4 to <2 x double>
1231 ; fadd forces execution domain
1232 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
1236 define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1237 ;CHECK-LABEL: stack_fold_orpd_ymm
1238 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1239 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1240 %2 = bitcast <4 x double> %a0 to <4 x i64>
1241 %3 = bitcast <4 x double> %a1 to <4 x i64>
1242 %4 = or <4 x i64> %2, %3
1243 %5 = bitcast <4 x i64> %4 to <4 x double>
1244 ; fadd forces execution domain
1245 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
1249 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
1250 ;CHECK-LABEL: stack_fold_orps
1251 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1253 %2 = bitcast <4 x float> %a0 to <2 x i64>
1254 %3 = bitcast <4 x float> %a1 to <2 x i64>
1255 %4 = or <2 x i64> %2, %3
1256 %5 = bitcast <2 x i64> %4 to <4 x float>
1257 ; fadd forces execution domain
1258 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
1262 define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) {
1263 ;CHECK-LABEL: stack_fold_orps_ymm
1264 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1265 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1266 %2 = bitcast <8 x float> %a0 to <4 x i64>
1267 %3 = bitcast <8 x float> %a1 to <4 x i64>
1268 %4 = or <4 x i64> %2, %3
1269 %5 = bitcast <4 x i64> %4 to <8 x float>
1270 ; fadd forces execution domain
1271 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
1275 define <8 x float> @stack_fold_perm2f128(<8 x float> %a0, <8 x float> %a1) {
1276 ;CHECK-LABEL: stack_fold_perm2f128
1277 ;CHECK: vperm2f128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1278 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1279 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1283 define <2 x double> @stack_fold_permilpd(<2 x double> %a0) {
1284 ;CHECK-LABEL: stack_fold_permilpd
1285 ;CHECK: vpermilpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1286 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1287 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1291 define <4 x double> @stack_fold_permilpd_ymm(<4 x double> %a0) {
1292 ;CHECK-LABEL: stack_fold_permilpd_ymm
1293 ;CHECK: vpermilpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1294 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1295 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
1299 define <2 x double> @stack_fold_permilpdvar(<2 x double> %a0, <2 x i64> %a1) {
1300 ;CHECK-LABEL: stack_fold_permilpdvar
1301 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1302 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1303 %2 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
1306 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
1308 define <4 x double> @stack_fold_permilpdvar_ymm(<4 x double> %a0, <4 x i64> %a1) {
1309 ;CHECK-LABEL: stack_fold_permilpdvar_ymm
1310 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1312 %2 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
1315 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
1317 define <4 x float> @stack_fold_permilps(<4 x float> %a0) {
1318 ;CHECK-LABEL: stack_fold_permilps
1319 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1320 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1321 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1325 define <8 x float> @stack_fold_permilps_ymm(<8 x float> %a0) {
1326 ;CHECK-LABEL: stack_fold_permilps_ymm
1327 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1329 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1333 define <4 x float> @stack_fold_permilpsvar(<4 x float> %a0, <4 x i32> %a1) {
1334 ;CHECK-LABEL: stack_fold_permilpsvar
1335 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1336 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1337 %2 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
1340 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
1342 define <8 x float> @stack_fold_permilpsvar_ymm(<8 x float> %a0, <8 x i32> %a1) {
1343 ;CHECK-LABEL: stack_fold_permilpsvar_ymm
1344 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1345 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1346 %2 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
1349 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
1351 ; TODO stack_fold_rcpps
1353 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
1354 ;CHECK-LABEL: stack_fold_rcpps_int
1355 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1356 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1357 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
1360 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1362 ; TODO stack_fold_rcpps_ymm
1364 define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) {
1365 ;CHECK-LABEL: stack_fold_rcpps_ymm_int
1366 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1367 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1368 %2 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0)
1371 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
1373 ; TODO stack_fold_rcpss
1375 define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0) {
1376 ;CHECK-LABEL: stack_fold_rcpss_int
1377 ;CHECK: vrcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1378 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1379 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0)
1382 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1384 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
1385 ;CHECK-LABEL: stack_fold_roundpd
1386 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1387 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1388 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
1391 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1393 define <4 x double> @stack_fold_roundpd_ymm(<4 x double> %a0) {
1394 ;CHECK-LABEL: stack_fold_roundpd_ymm
1395 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1396 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1397 %2 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
1400 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
1402 define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
1403 ;CHECK-LABEL: stack_fold_roundps
1404 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1406 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
1409 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1411 define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) {
1412 ;CHECK-LABEL: stack_fold_roundps_ymm
1413 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1414 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1415 %2 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)
1418 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
1420 define double @stack_fold_roundsd(double %a0) optsize {
1421 ;CHECK-LABEL: stack_fold_roundsd
1422 ;CHECK: vroundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1424 %2 = call double @llvm.floor.f64(double %a0)
1427 declare double @llvm.floor.f64(double) nounwind readnone
1429 ; TODO stack_fold_roundsd_int
1430 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1432 define float @stack_fold_roundss(float %a0) optsize {
1433 ;CHECK-LABEL: stack_fold_roundss
1434 ;CHECK: vroundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1435 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1436 %2 = call float @llvm.floor.f32(float %a0)
1439 declare float @llvm.floor.f32(float) nounwind readnone
1441 ; TODO stack_fold_roundss_int
1442 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1444 ; TODO stack_fold_rsqrtps
1446 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
1447 ;CHECK-LABEL: stack_fold_rsqrtps_int
1448 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1449 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1450 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
1453 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1455 ; TODO stack_fold_rsqrtps_ymm
1457 define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) {
1458 ;CHECK-LABEL: stack_fold_rsqrtps_ymm_int
1459 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1460 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1461 %2 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0)
1464 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
1466 ; TODO stack_fold_rsqrtss
1468 define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0) {
1469 ;CHECK-LABEL: stack_fold_rsqrtss_int
1470 ;CHECK: vrsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1471 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1472 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0)
1475 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
1477 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
1478 ;CHECK-LABEL: stack_fold_shufpd
1479 ;CHECK: vshufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1481 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
1485 define <4 x double> @stack_fold_shufpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1486 ;CHECK-LABEL: stack_fold_shufpd_ymm
1487 ;CHECK: vshufpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1488 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1489 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
1493 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
1494 ;CHECK-LABEL: stack_fold_shufps
1495 ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1497 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
1501 define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) {
1502 ;CHECK-LABEL: stack_fold_shufps_ymm
1503 ;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1504 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1505 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14>
1509 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
1510 ;CHECK-LABEL: stack_fold_sqrtpd
1511 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1512 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1513 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
1516 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
1518 define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) {
1519 ;CHECK-LABEL: stack_fold_sqrtpd_ymm
1520 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1521 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1522 %2 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
1525 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
1527 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
1528 ;CHECK-LABEL: stack_fold_sqrtps
1529 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1530 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1531 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
1534 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
1536 define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
1537 ;CHECK-LABEL: stack_fold_sqrtps_ymm
1538 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1540 %2 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
1543 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
1545 define double @stack_fold_sqrtsd(double %a0) {
1546 ;CHECK-LABEL: stack_fold_sqrtsd
1547 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1548 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1549 %2 = call double @llvm.sqrt.f64(double %a0)
1552 declare double @llvm.sqrt.f64(double) nounwind readnone
1554 define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0) {
1555 ;CHECK-LABEL: stack_fold_sqrtsd_int
1556 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1557 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1558 %2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
1561 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
1563 define float @stack_fold_sqrtss(float %a0) {
1564 ;CHECK-LABEL: stack_fold_sqrtss
1565 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1566 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1567 %2 = call float @llvm.sqrt.f32(float %a0)
1570 declare float @llvm.sqrt.f32(float) nounwind readnone
1572 define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0) {
1573 ;CHECK-LABEL: stack_fold_sqrtss_int
1574 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1575 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1576 %2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
1579 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
1581 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
1582 ;CHECK-LABEL: stack_fold_subpd
1583 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1585 %2 = fsub <2 x double> %a0, %a1
1589 define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1590 ;CHECK-LABEL: stack_fold_subpd_ymm
1591 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1593 %2 = fsub <4 x double> %a0, %a1
1597 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
1598 ;CHECK-LABEL: stack_fold_subps
1599 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1600 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1601 %2 = fsub <4 x float> %a0, %a1
1605 define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) {
1606 ;CHECK-LABEL: stack_fold_subps_ymm
1607 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1608 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1609 %2 = fsub <8 x float> %a0, %a1
1613 define double @stack_fold_subsd(double %a0, double %a1) {
1614 ;CHECK-LABEL: stack_fold_subsd
1615 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1616 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1617 %2 = fsub double %a0, %a1
1621 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
1622 ;CHECK-LABEL: stack_fold_subsd_int
1623 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1624 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1625 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1)
1628 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
1630 define float @stack_fold_subss(float %a0, float %a1) {
1631 ;CHECK-LABEL: stack_fold_subss
1632 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1633 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1634 %2 = fsub float %a0, %a1
1638 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
1639 ;CHECK-LABEL: stack_fold_subss_int
1640 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1641 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1642 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1)
1645 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
1647 define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) {
1648 ;CHECK-LABEL: stack_fold_testpd
1649 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1650 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1651 %2 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
1654 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
1656 define i32 @stack_fold_testpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1657 ;CHECK-LABEL: stack_fold_testpd_ymm
1658 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1659 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1660 %2 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
1663 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
1665 define i32 @stack_fold_testps(<4 x float> %a0, <4 x float> %a1) {
1666 ;CHECK-LABEL: stack_fold_testps
1667 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1668 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1669 %2 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
1672 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
1674 define i32 @stack_fold_testps_ymm(<8 x float> %a0, <8 x float> %a1) {
1675 ;CHECK-LABEL: stack_fold_testps_ymm
1676 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1677 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1678 %2 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
1681 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
1683 define i32 @stack_fold_ucomisd(double %a0, double %a1) {
1684 ;CHECK-LABEL: stack_fold_ucomisd
1685 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1686 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1687 %2 = fcmp ueq double %a0, %a1
1688 %3 = select i1 %2, i32 1, i32 -1
1692 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
1693 ;CHECK-LABEL: stack_fold_ucomisd_int
1694 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1695 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1696 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
1699 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1701 define i32 @stack_fold_ucomiss(float %a0, float %a1) {
1702 ;CHECK-LABEL: stack_fold_ucomiss
1703 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1704 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1705 %2 = fcmp ueq float %a0, %a1
1706 %3 = select i1 %2, i32 1, i32 -1
1710 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
1711 ;CHECK-LABEL: stack_fold_ucomiss_int
1712 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1713 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1714 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
1717 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
1719 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
1720 ;CHECK-LABEL: stack_fold_unpckhpd
1721 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1722 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1723 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
1724 ; fadd forces execution domain
1725 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1729 define <4 x double> @stack_fold_unpckhpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1730 ;CHECK-LABEL: stack_fold_unpckhpd_ymm
1731 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1732 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1733 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1734 ; fadd forces execution domain
1735 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
1739 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
1740 ;CHECK-LABEL: stack_fold_unpckhps
1741 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1742 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1743 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1744 ; fadd forces execution domain
1745 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1749 define <8 x float> @stack_fold_unpckhps_ymm(<8 x float> %a0, <8 x float> %a1) {
1750 ;CHECK-LABEL: stack_fold_unpckhps_ymm
1751 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1753 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1754 ; fadd forces execution domain
1755 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
1759 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
1760 ;CHECK-LABEL: stack_fold_unpcklpd
1761 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1762 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1763 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
1764 ; fadd forces execution domain
1765 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1769 define <4 x double> @stack_fold_unpcklpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1770 ;CHECK-LABEL: stack_fold_unpcklpd_ymm
1771 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1772 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1773 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1774 ; fadd forces execution domain
1775 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
1779 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
1780 ;CHECK-LABEL: stack_fold_unpcklps
1781 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1783 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1784 ; fadd forces execution domain
1785 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1789 define <8 x float> @stack_fold_unpcklps_ymm(<8 x float> %a0, <8 x float> %a1) {
1790 ;CHECK-LABEL: stack_fold_unpcklps_ymm
1791 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1792 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1793 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
1794 ; fadd forces execution domain
1795 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
1799 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
1800 ;CHECK-LABEL: stack_fold_xorpd
1801 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1802 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1803 %2 = bitcast <2 x double> %a0 to <2 x i64>
1804 %3 = bitcast <2 x double> %a1 to <2 x i64>
1805 %4 = xor <2 x i64> %2, %3
1806 %5 = bitcast <2 x i64> %4 to <2 x double>
1807 ; fadd forces execution domain
1808 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
1812 define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) {
1813 ;CHECK-LABEL: stack_fold_xorpd_ymm
1814 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1815 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1816 %2 = bitcast <4 x double> %a0 to <4 x i64>
1817 %3 = bitcast <4 x double> %a1 to <4 x i64>
1818 %4 = xor <4 x i64> %2, %3
1819 %5 = bitcast <4 x i64> %4 to <4 x double>
1820 ; fadd forces execution domain
1821 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
1825 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
1826 ;CHECK-LABEL: stack_fold_xorps
1827 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1828 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1829 %2 = bitcast <4 x float> %a0 to <2 x i64>
1830 %3 = bitcast <4 x float> %a1 to <2 x i64>
1831 %4 = xor <2 x i64> %2, %3
1832 %5 = bitcast <2 x i64> %4 to <4 x float>
1833 ; fadd forces execution domain
1834 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
1838 define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
1839 ;CHECK-LABEL: stack_fold_xorps_ymm
1840 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1842 %2 = bitcast <8 x float> %a0 to <4 x i64>
1843 %3 = bitcast <8 x float> %a1 to <4 x i64>
1844 %4 = xor <4 x i64> %2, %3
1845 %5 = bitcast <4 x i64> %4 to <8 x float>
1846 ; fadd forces execution domain
1847 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>