1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2 < %s | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-unknown"
6 ; Stack reload folding tests.
8 ; By including a nop call with sideeffects we can force a partial register spill of the
9 ; relevant registers and check that the reload is correctly folded into the instruction.
11 define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
12 ;CHECK-LABEL: stack_fold_addpd
13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15 %2 = fadd <2 x double> %a0, %a1
19 define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
20 ;CHECK-LABEL: stack_fold_addps
21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
23 %2 = fadd <4 x float> %a0, %a1
27 define double @stack_fold_addsd(double %a0, double %a1) {
28 ;CHECK-LABEL: stack_fold_addsd
29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
31 %2 = fadd double %a0, %a1
35 define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
36 ;CHECK-LABEL: stack_fold_addsd_int
37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
39 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1)
42 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
44 define float @stack_fold_addss(float %a0, float %a1) {
45 ;CHECK-LABEL: stack_fold_addss
46 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
47 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
48 %2 = fadd float %a0, %a1
52 define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
53 ;CHECK-LABEL: stack_fold_addss_int
54 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
55 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
56 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1)
59 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
61 define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
62 ;CHECK-LABEL: stack_fold_addsubpd
63 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
64 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
65 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
68 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
70 define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
71 ;CHECK-LABEL: stack_fold_addsubps
72 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
73 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
74 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
77 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
79 define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
80 ;CHECK-LABEL: stack_fold_andnpd
81 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
82 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
83 %2 = bitcast <2 x double> %a0 to <2 x i64>
84 %3 = bitcast <2 x double> %a1 to <2 x i64>
85 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
86 %5 = and <2 x i64> %4, %3
87 %6 = bitcast <2 x i64> %5 to <2 x double>
88 ; fadd forces execution domain
89 %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
93 define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
94 ;CHECK-LABEL: stack_fold_andnps
95 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
96 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
97 %2 = bitcast <4 x float> %a0 to <2 x i64>
98 %3 = bitcast <4 x float> %a1 to <2 x i64>
99 %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
100 %5 = and <2 x i64> %4, %3
101 %6 = bitcast <2 x i64> %5 to <4 x float>
102 ; fadd forces execution domain
103 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
107 define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
108 ;CHECK-LABEL: stack_fold_andpd
109 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
111 %2 = bitcast <2 x double> %a0 to <2 x i64>
112 %3 = bitcast <2 x double> %a1 to <2 x i64>
113 %4 = and <2 x i64> %2, %3
114 %5 = bitcast <2 x i64> %4 to <2 x double>
115 ; fadd forces execution domain
116 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
120 define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
121 ;CHECK-LABEL: stack_fold_andps
122 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
123 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
124 %2 = bitcast <4 x float> %a0 to <2 x i64>
125 %3 = bitcast <4 x float> %a1 to <2 x i64>
126 %4 = and <2 x i64> %2, %3
127 %5 = bitcast <2 x i64> %4 to <4 x float>
128 ; fadd forces execution domain
129 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
133 define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
134 ;CHECK-LABEL: stack_fold_blendpd
135 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
137 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
141 define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
142 ;CHECK-LABEL: stack_fold_blendps
143 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
145 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
149 define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
150 ;CHECK-LABEL: stack_fold_blendvpd
151 ;CHECK: blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
153 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
156 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
158 define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
159 ;CHECK-LABEL: stack_fold_blendvps
160 ;CHECK: blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
162 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
165 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
167 define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
168 ;CHECK-LABEL: stack_fold_cmppd
169 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
170 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
171 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
174 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
176 define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
177 ;CHECK-LABEL: stack_fold_cmpps
178 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
180 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
183 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
185 define i32 @stack_fold_cmpsd(double %a0, double %a1) {
186 ;CHECK-LABEL: stack_fold_cmpsd
187 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
189 %2 = fcmp oeq double %a0, %a1
190 %3 = zext i1 %2 to i32
194 define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
195 ;CHECK-LABEL: stack_fold_cmpsd_int
196 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
197 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
198 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
201 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
203 define i32 @stack_fold_cmpss(float %a0, float %a1) {
204 ;CHECK-LABEL: stack_fold_cmpss
205 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
207 %2 = fcmp oeq float %a0, %a1
208 %3 = zext i1 %2 to i32
212 define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
213 ;CHECK-LABEL: stack_fold_cmpss_int
214 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
216 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
219 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
221 ; TODO stack_fold_comisd
223 define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
224 ;CHECK-LABEL: stack_fold_comisd_int
225 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
227 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
230 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
232 ; TODO stack_fold_comiss
234 define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
235 ;CHECK-LABEL: stack_fold_comiss_int
236 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
237 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
238 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
241 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
243 define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
244 ;CHECK-LABEL: stack_fold_cvtdq2pd
245 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
247 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0)
250 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
252 define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
253 ;CHECK-LABEL: stack_fold_cvtdq2ps
254 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
255 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
256 %2 = sitofp <4 x i32> %a0 to <4 x float>
260 define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
261 ;CHECK-LABEL: stack_fold_cvtpd2dq
262 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
264 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
267 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
269 define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
270 ;CHECK-LABEL: stack_fold_cvtpd2ps
271 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
273 %2 = fptrunc <2 x double> %a0 to <2 x float>
277 define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
278 ;CHECK-LABEL: stack_fold_cvtps2dq
279 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
280 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
281 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
284 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
286 define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
287 ;CHECK-LABEL: stack_fold_cvtps2pd
288 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
290 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
293 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
295 ; TODO stack_fold_cvtsd2si
297 define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
298 ;CHECK-LABEL: stack_fold_cvtsd2si_int
299 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
300 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
301 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
304 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
306 ; TODO stack_fold_cvtsd2si64
308 define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
309 ;CHECK-LABEL: stack_fold_cvtsd2si64_int
310 ;CHECK: cvtsd2siq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
312 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
315 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
317 define float @stack_fold_cvtsd2ss(double %a0) minsize {
318 ;CHECK-LABEL: stack_fold_cvtsd2ss
319 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
320 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
321 %2 = fptrunc double %a0 to float
325 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
326 ;CHECK-LABEL: stack_fold_cvtsd2ss_int
327 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
329 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
332 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
334 define double @stack_fold_cvtsi2sd(i32 %a0) minsize {
335 ;CHECK-LABEL: stack_fold_cvtsi2sd
336 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
338 %2 = sitofp i32 %a0 to double
342 define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
343 ;CHECK-LABEL: stack_fold_cvtsi2sd_int
344 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
345 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
346 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0)
349 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
351 define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
352 ;CHECK-LABEL: stack_fold_cvtsi642sd
353 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
354 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
355 %2 = sitofp i64 %a0 to double
359 define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
360 ;CHECK-LABEL: stack_fold_cvtsi642sd_int
361 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
362 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
363 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0)
366 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
368 define float @stack_fold_cvtsi2ss(i32 %a0) minsize {
369 ;CHECK-LABEL: stack_fold_cvtsi2ss
370 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
371 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
372 %2 = sitofp i32 %a0 to float
376 define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
377 ;CHECK-LABEL: stack_fold_cvtsi2ss_int
378 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
380 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0)
383 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
385 define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
386 ;CHECK-LABEL: stack_fold_cvtsi642ss
387 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
388 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
389 %2 = sitofp i64 %a0 to float
393 define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
394 ;CHECK-LABEL: stack_fold_cvtsi642ss_int
395 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
396 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
397 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0)
400 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
402 define double @stack_fold_cvtss2sd(float %a0) minsize {
403 ;CHECK-LABEL: stack_fold_cvtss2sd
404 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
406 %2 = fpext float %a0 to double
410 define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize {
411 ;CHECK-LABEL: stack_fold_cvtss2sd_int
412 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
413 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
414 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0)
417 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
419 ; TODO stack_fold_cvtss2si
421 define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
422 ;CHECK-LABEL: stack_fold_cvtss2si_int
423 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
424 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
425 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
428 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
430 ; TODO stack_fold_cvtss2si64
432 define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
433 ;CHECK-LABEL: stack_fold_cvtss2si64_int
434 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
435 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
436 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
439 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
441 define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
442 ;CHECK-LABEL: stack_fold_cvttpd2dq
443 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
444 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
445 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
448 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
450 define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
451 ;CHECK-LABEL: stack_fold_cvttps2dq
452 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
453 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
454 %2 = fptosi <4 x float> %a0 to <4 x i32>
458 define i32 @stack_fold_cvttsd2si(double %a0) {
459 ;CHECK-LABEL: stack_fold_cvttsd2si
460 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload
461 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
462 %2 = fptosi double %a0 to i32
466 define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
467 ;CHECK-LABEL: stack_fold_cvttsd2si_int
468 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
469 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
470 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
473 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
475 define i64 @stack_fold_cvttsd2si64(double %a0) {
476 ;CHECK-LABEL: stack_fold_cvttsd2si64
477 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
478 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
479 %2 = fptosi double %a0 to i64
483 define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
484 ;CHECK-LABEL: stack_fold_cvttsd2si64_int
485 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
486 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
487 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
490 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
492 define i32 @stack_fold_cvttss2si(float %a0) {
493 ;CHECK-LABEL: stack_fold_cvttss2si
494 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
495 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
496 %2 = fptosi float %a0 to i32
500 define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
501 ;CHECK-LABEL: stack_fold_cvttss2si_int
502 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
503 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
504 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
507 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
509 define i64 @stack_fold_cvttss2si64(float %a0) {
510 ;CHECK-LABEL: stack_fold_cvttss2si64
511 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
512 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
513 %2 = fptosi float %a0 to i64
517 define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
518 ;CHECK-LABEL: stack_fold_cvttss2si64_int
519 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
520 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
521 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
524 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
526 define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
527 ;CHECK-LABEL: stack_fold_divpd
528 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
529 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
530 %2 = fdiv <2 x double> %a0, %a1
534 define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
535 ;CHECK-LABEL: stack_fold_divps
536 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
537 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
538 %2 = fdiv <4 x float> %a0, %a1
542 define double @stack_fold_divsd(double %a0, double %a1) {
543 ;CHECK-LABEL: stack_fold_divsd
544 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
545 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
546 %2 = fdiv double %a0, %a1
550 define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
551 ;CHECK-LABEL: stack_fold_divsd_int
552 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
553 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
554 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1)
557 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
559 define float @stack_fold_divss(float %a0, float %a1) {
560 ;CHECK-LABEL: stack_fold_divss
561 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
562 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
563 %2 = fdiv float %a0, %a1
567 define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
568 ;CHECK-LABEL: stack_fold_divss_int
569 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
570 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
571 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1)
574 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
576 define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
577 ;CHECK-LABEL: stack_fold_dppd
578 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
579 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
580 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
583 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
585 define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
586 ;CHECK-LABEL: stack_fold_dpps
587 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
589 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
592 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
594 define i32 @stack_fold_extractps(<4 x float> %a0) {
595 ;CHECK-LABEL: stack_fold_extractps
596 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
597 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
598 %1 = extractelement <4 x float> %a0, i32 1
599 %2 = bitcast float %1 to i32
600 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
604 define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
605 ;CHECK-LABEL: stack_fold_haddpd
606 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
607 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
608 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
611 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
613 define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
614 ;CHECK-LABEL: stack_fold_haddps
615 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
616 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
617 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
620 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
622 define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
623 ;CHECK-LABEL: stack_fold_hsubpd
624 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
625 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
626 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
629 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
631 define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
632 ;CHECK-LABEL: stack_fold_hsubps
633 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
634 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
635 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
638 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
640 ; TODO stack_fold_insertps
642 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
643 ;CHECK-LABEL: stack_fold_maxpd
644 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
645 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
646 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
649 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
651 define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
652 ;CHECK-LABEL: stack_fold_maxps
653 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
654 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
655 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
658 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
660 define double @stack_fold_maxsd(double %a0, double %a1) {
661 ;CHECK-LABEL: stack_fold_maxsd
662 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
663 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
664 %2 = fcmp ogt double %a0, %a1
665 %3 = select i1 %2, double %a0, double %a1
669 define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) {
670 ;CHECK-LABEL: stack_fold_maxsd_int
671 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
672 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
673 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
676 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
678 define float @stack_fold_maxss(float %a0, float %a1) {
679 ;CHECK-LABEL: stack_fold_maxss
680 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
681 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
682 %2 = fcmp ogt float %a0, %a1
683 %3 = select i1 %2, float %a0, float %a1
687 define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) {
688 ;CHECK-LABEL: stack_fold_maxss_int
689 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
690 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
691 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
694 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
696 define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) {
697 ;CHECK-LABEL: stack_fold_minpd
698 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
699 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
700 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
703 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
705 define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
706 ;CHECK-LABEL: stack_fold_minps
707 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
708 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
709 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
712 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
714 define double @stack_fold_minsd(double %a0, double %a1) {
715 ;CHECK-LABEL: stack_fold_minsd
716 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
717 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
718 %2 = fcmp olt double %a0, %a1
719 %3 = select i1 %2, double %a0, double %a1
723 define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
724 ;CHECK-LABEL: stack_fold_minsd_int
725 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
726 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
727 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
730 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
732 define float @stack_fold_minss(float %a0, float %a1) {
733 ;CHECK-LABEL: stack_fold_minss
734 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
735 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
736 %2 = fcmp olt float %a0, %a1
737 %3 = select i1 %2, float %a0, float %a1
741 define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
742 ;CHECK-LABEL: stack_fold_minss_int
743 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
744 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
745 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
748 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
750 define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
751 ;CHECK-LABEL: stack_fold_movddup
752 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
753 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
754 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
757 ; TODO stack_fold_movhpd (load / store)
758 ; TODO stack_fold_movhps (load / store)
760 ; TODO stack_fold_movlpd (load / store)
761 ; TODO stack_fold_movlps (load / store)
763 define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
764 ;CHECK-LABEL: stack_fold_movshdup
765 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
766 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
767 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
771 define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
772 ;CHECK-LABEL: stack_fold_movsldup
773 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
774 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
775 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
779 define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
780 ;CHECK-LABEL: stack_fold_mulpd
781 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
783 %2 = fmul <2 x double> %a0, %a1
787 define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
788 ;CHECK-LABEL: stack_fold_mulps
789 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
790 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
791 %2 = fmul <4 x float> %a0, %a1
795 define double @stack_fold_mulsd(double %a0, double %a1) {
796 ;CHECK-LABEL: stack_fold_mulsd
797 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
798 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
799 %2 = fmul double %a0, %a1
803 define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
804 ;CHECK-LABEL: stack_fold_mulsd_int
805 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
806 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
807 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1)
810 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
812 define float @stack_fold_mulss(float %a0, float %a1) {
813 ;CHECK-LABEL: stack_fold_mulss
814 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
815 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
816 %2 = fmul float %a0, %a1
820 define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
821 ;CHECK-LABEL: stack_fold_mulss_int
822 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
823 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
824 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1)
827 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
829 define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
830 ;CHECK-LABEL: stack_fold_orpd
831 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
832 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
833 %2 = bitcast <2 x double> %a0 to <2 x i64>
834 %3 = bitcast <2 x double> %a1 to <2 x i64>
835 %4 = or <2 x i64> %2, %3
836 %5 = bitcast <2 x i64> %4 to <2 x double>
837 ; fadd forces execution domain
838 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
842 define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
843 ;CHECK-LABEL: stack_fold_orps
844 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
845 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
846 %2 = bitcast <4 x float> %a0 to <2 x i64>
847 %3 = bitcast <4 x float> %a1 to <2 x i64>
848 %4 = or <2 x i64> %2, %3
849 %5 = bitcast <2 x i64> %4 to <4 x float>
850 ; fadd forces execution domain
851 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
855 ; TODO stack_fold_rcpps
857 define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
858 ;CHECK-LABEL: stack_fold_rcpps_int
859 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
860 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
861 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
864 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
866 ; TODO stack_fold_rcpss
867 ; TODO stack_fold_rcpss_int
869 define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
870 ;CHECK-LABEL: stack_fold_roundpd
871 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
872 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
873 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
876 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
878 define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
879 ;CHECK-LABEL: stack_fold_roundps
880 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
881 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
882 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
885 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
887 define double @stack_fold_roundsd(double %a0) optsize {
888 ;CHECK-LABEL: stack_fold_roundsd
889 ;CHECK: roundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
890 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
891 %2 = call double @llvm.floor.f64(double %a0)
894 declare double @llvm.floor.f64(double) nounwind readnone
896 ; TODO stack_fold_roundsd_int
897 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
899 define float @stack_fold_roundss(float %a0) minsize {
900 ;CHECK-LABEL: stack_fold_roundss
901 ;CHECK: roundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
902 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
903 %2 = call float @llvm.floor.f32(float %a0)
906 declare float @llvm.floor.f32(float) nounwind readnone
908 ; TODO stack_fold_roundss_int
909 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
911 ; TODO stack_fold_rsqrtps
913 define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
914 ;CHECK-LABEL: stack_fold_rsqrtps_int
915 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
916 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
917 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
920 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
922 ; TODO stack_fold_rsqrtss
923 ; TODO stack_fold_rsqrtss_int
925 define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
926 ;CHECK-LABEL: stack_fold_shufpd
927 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
928 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
929 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
933 define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
934 ;CHECK-LABEL: stack_fold_shufps
935 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
936 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
937 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
941 define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
942 ;CHECK-LABEL: stack_fold_sqrtpd
943 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
945 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
948 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
950 define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
951 ;CHECK-LABEL: stack_fold_sqrtps
952 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
953 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
954 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
957 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
959 define double @stack_fold_sqrtsd(double %a0) optsize {
960 ;CHECK-LABEL: stack_fold_sqrtsd
961 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
963 %2 = call double @llvm.sqrt.f64(double %a0)
966 declare double @llvm.sqrt.f64(double) nounwind readnone
968 ; TODO stack_fold_sqrtsd_int
969 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
971 define float @stack_fold_sqrtss(float %a0) minsize {
972 ;CHECK-LABEL: stack_fold_sqrtss
973 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
974 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
975 %2 = call float @llvm.sqrt.f32(float %a0)
978 declare float @llvm.sqrt.f32(float) nounwind readnone
980 ; TODO stack_fold_sqrtss_int
981 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
983 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
984 ;CHECK-LABEL: stack_fold_subpd
985 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
986 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
987 %2 = fsub <2 x double> %a0, %a1
991 define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
992 ;CHECK-LABEL: stack_fold_subps
993 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
994 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
995 %2 = fsub <4 x float> %a0, %a1
999 define double @stack_fold_subsd(double %a0, double %a1) {
1000 ;CHECK-LABEL: stack_fold_subsd
1001 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1002 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1003 %2 = fsub double %a0, %a1
1007 define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
1008 ;CHECK-LABEL: stack_fold_subsd_int
1009 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1010 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1011 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1)
1014 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
1016 define float @stack_fold_subss(float %a0, float %a1) {
1017 ;CHECK-LABEL: stack_fold_subss
1018 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1019 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1020 %2 = fsub float %a0, %a1
1024 define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
1025 ;CHECK-LABEL: stack_fold_subss_int
1026 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1027 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1028 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1)
1031 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
1033 define i32 @stack_fold_ucomisd(double %a0, double %a1) {
1034 ;CHECK-LABEL: stack_fold_ucomisd
1035 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
1036 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1037 %2 = fcmp ueq double %a0, %a1
1038 %3 = select i1 %2, i32 1, i32 -1
1042 define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
1043 ;CHECK-LABEL: stack_fold_ucomisd_int
1044 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1045 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1046 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
1049 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1051 define i32 @stack_fold_ucomiss(float %a0, float %a1) {
1052 ;CHECK-LABEL: stack_fold_ucomiss
1053 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
1054 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1055 %2 = fcmp ueq float %a0, %a1
1056 %3 = select i1 %2, i32 1, i32 -1
1060 define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
1061 ;CHECK-LABEL: stack_fold_ucomiss_int
1062 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1063 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1064 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
1067 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
1069 define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
1070 ;CHECK-LABEL: stack_fold_unpckhpd
1071 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1072 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1073 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
1074 ; fadd forces execution domain
1075 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1079 define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
1080 ;CHECK-LABEL: stack_fold_unpckhps
1081 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1082 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1083 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1084 ; fadd forces execution domain
1085 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1089 define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
1090 ;CHECK-LABEL: stack_fold_unpcklpd
1091 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1092 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1093 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
1094 ; fadd forces execution domain
1095 %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
1099 define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
1100 ;CHECK-LABEL: stack_fold_unpcklps
1101 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1102 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1103 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1104 ; fadd forces execution domain
1105 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
1109 define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
1110 ;CHECK-LABEL: stack_fold_xorpd
1111 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1112 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1113 %2 = bitcast <2 x double> %a0 to <2 x i64>
1114 %3 = bitcast <2 x double> %a1 to <2 x i64>
1115 %4 = xor <2 x i64> %2, %3
1116 %5 = bitcast <2 x i64> %4 to <2 x double>
1117 ; fadd forces execution domain
1118 %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
1122 define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
1123 ;CHECK-LABEL: stack_fold_xorps
1124 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1125 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1126 %2 = bitcast <4 x float> %a0 to <2 x i64>
1127 %3 = bitcast <4 x float> %a1 to <2 x i64>
1128 %4 = xor <2 x i64> %2, %3
1129 %5 = bitcast <2 x i64> %4 to <4 x float>
1130 ; fadd forces execution domain
1131 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>