1 ; RUN: llc -march=mipsel -mattr=+msa < %s | FileCheck -check-prefix=LITENDIAN %s
2 ; RUN: llc -march=mips -mattr=+msa < %s | FileCheck -check-prefix=BIGENDIAN %s
4 define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
6 %0 = load volatile <16 x i8>* %src
7 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
8 %2 = bitcast <16 x i8> %1 to <16 x i8>
9 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
10 store <16 x i8> %3, <16 x i8>* %dst
14 ; LITENDIAN: v16i8_to_v16i8:
15 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
16 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
17 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
18 ; LITENDIAN: st.b [[R3]],
19 ; LITENDIAN: .size v16i8_to_v16i8
21 ; BIGENDIAN: v16i8_to_v16i8:
22 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
23 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
24 ; BIGENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
25 ; BIGENDIAN: st.b [[R3]],
26 ; BIGENDIAN: .size v16i8_to_v16i8
28 define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
30 %0 = load volatile <16 x i8>* %src
31 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
32 %2 = bitcast <16 x i8> %1 to <8 x i16>
33 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
34 store <8 x i16> %3, <8 x i16>* %dst
38 ; LITENDIAN: v16i8_to_v8i16:
39 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
40 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
41 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
42 ; LITENDIAN: st.h [[R3]],
43 ; LITENDIAN: .size v16i8_to_v8i16
45 ; BIGENDIAN: v16i8_to_v8i16:
46 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
47 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
48 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
49 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
50 ; BIGENDIAN: st.h [[R4]],
51 ; BIGENDIAN: .size v16i8_to_v8i16
53 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
54 ; are no operations for v8f16 to put in the way.
55 define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
57 %0 = load volatile <16 x i8>* %src
58 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
59 %2 = bitcast <16 x i8> %1 to <8 x half>
60 store <8 x half> %2, <8 x half>* %dst
64 ; LITENDIAN: v16i8_to_v8f16:
65 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
66 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
67 ; LITENDIAN: st.b [[R2]],
68 ; LITENDIAN: .size v16i8_to_v8f16
70 ; BIGENDIAN: v16i8_to_v8f16:
71 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
72 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
73 ; BIGENDIAN: st.b [[R2]],
74 ; BIGENDIAN: .size v16i8_to_v8f16
76 define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
78 %0 = load volatile <16 x i8>* %src
79 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
80 %2 = bitcast <16 x i8> %1 to <4 x i32>
81 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
82 store <4 x i32> %3, <4 x i32>* %dst
86 ; LITENDIAN: v16i8_to_v4i32:
87 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
88 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
89 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
90 ; LITENDIAN: st.w [[R3]],
91 ; LITENDIAN: .size v16i8_to_v4i32
93 ; BIGENDIAN: v16i8_to_v4i32:
94 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
95 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
96 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
97 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
98 ; BIGENDIAN: st.w [[R4]],
99 ; BIGENDIAN: .size v16i8_to_v4i32
101 define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
103 %0 = load volatile <16 x i8>* %src
104 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
105 %2 = bitcast <16 x i8> %1 to <4 x float>
106 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
107 store <4 x float> %3, <4 x float>* %dst
111 ; LITENDIAN: v16i8_to_v4f32:
112 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
113 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
114 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
115 ; LITENDIAN: st.w [[R3]],
116 ; LITENDIAN: .size v16i8_to_v4f32
118 ; BIGENDIAN: v16i8_to_v4f32:
119 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
120 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
121 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
122 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
123 ; BIGENDIAN: st.w [[R4]],
124 ; BIGENDIAN: .size v16i8_to_v4f32
126 define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
128 %0 = load volatile <16 x i8>* %src
129 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
130 %2 = bitcast <16 x i8> %1 to <2 x i64>
131 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
132 store <2 x i64> %3, <2 x i64>* %dst
136 ; LITENDIAN: v16i8_to_v2i64:
137 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
138 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
139 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
140 ; LITENDIAN: st.d [[R3]],
141 ; LITENDIAN: .size v16i8_to_v2i64
143 ; BIGENDIAN: v16i8_to_v2i64:
144 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
145 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
146 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
147 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
148 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
149 ; BIGENDIAN: st.d [[R4]],
150 ; BIGENDIAN: .size v16i8_to_v2i64
152 define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
154 %0 = load volatile <16 x i8>* %src
155 %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
156 %2 = bitcast <16 x i8> %1 to <2 x double>
157 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
158 store <2 x double> %3, <2 x double>* %dst
162 ; LITENDIAN: v16i8_to_v2f64:
163 ; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
164 ; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
165 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
166 ; LITENDIAN: st.d [[R3]],
167 ; LITENDIAN: .size v16i8_to_v2f64
169 ; BIGENDIAN: v16i8_to_v2f64:
170 ; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
171 ; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
172 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
173 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
174 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
175 ; BIGENDIAN: st.d [[R4]],
176 ; BIGENDIAN: .size v16i8_to_v2f64
178 define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
180 %0 = load volatile <8 x i16>* %src
181 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
182 %2 = bitcast <8 x i16> %1 to <16 x i8>
183 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
184 store <16 x i8> %3, <16 x i8>* %dst
188 ; LITENDIAN: v8i16_to_v16i8:
189 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
190 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
191 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
192 ; LITENDIAN: st.b [[R3]],
193 ; LITENDIAN: .size v8i16_to_v16i8
195 ; BIGENDIAN: v8i16_to_v16i8:
196 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
197 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
198 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
199 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
200 ; BIGENDIAN: st.b [[R4]],
201 ; BIGENDIAN: .size v8i16_to_v16i8
203 define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
205 %0 = load volatile <8 x i16>* %src
206 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
207 %2 = bitcast <8 x i16> %1 to <8 x i16>
208 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
209 store <8 x i16> %3, <8 x i16>* %dst
213 ; LITENDIAN: v8i16_to_v8i16:
214 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
215 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
216 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
217 ; LITENDIAN: st.h [[R3]],
218 ; LITENDIAN: .size v8i16_to_v8i16
220 ; BIGENDIAN: v8i16_to_v8i16:
221 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
222 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
223 ; BIGENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
224 ; BIGENDIAN: st.h [[R3]],
225 ; BIGENDIAN: .size v8i16_to_v8i16
227 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
228 ; are no operations for v8f16 to put in the way.
229 define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
231 %0 = load volatile <8 x i16>* %src
232 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
233 %2 = bitcast <8 x i16> %1 to <8 x half>
234 store <8 x half> %2, <8 x half>* %dst
238 ; LITENDIAN: v8i16_to_v8f16:
239 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
240 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
241 ; LITENDIAN: st.h [[R2]],
242 ; LITENDIAN: .size v8i16_to_v8f16
244 ; BIGENDIAN: v8i16_to_v8f16:
245 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
246 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
247 ; BIGENDIAN: st.h [[R2]],
248 ; BIGENDIAN: .size v8i16_to_v8f16
250 define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
252 %0 = load volatile <8 x i16>* %src
253 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
254 %2 = bitcast <8 x i16> %1 to <4 x i32>
255 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
256 store <4 x i32> %3, <4 x i32>* %dst
260 ; LITENDIAN: v8i16_to_v4i32:
261 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
262 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
263 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
264 ; LITENDIAN: st.w [[R3]],
265 ; LITENDIAN: .size v8i16_to_v4i32
267 ; BIGENDIAN: v8i16_to_v4i32:
268 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
269 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
270 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
271 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
272 ; BIGENDIAN: st.w [[R4]],
273 ; BIGENDIAN: .size v8i16_to_v4i32
275 define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
277 %0 = load volatile <8 x i16>* %src
278 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
279 %2 = bitcast <8 x i16> %1 to <4 x float>
280 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
281 store <4 x float> %3, <4 x float>* %dst
285 ; LITENDIAN: v8i16_to_v4f32:
286 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
287 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
288 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
289 ; LITENDIAN: st.w [[R3]],
290 ; LITENDIAN: .size v8i16_to_v4f32
292 ; BIGENDIAN: v8i16_to_v4f32:
293 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
294 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
295 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
296 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
297 ; BIGENDIAN: st.w [[R4]],
298 ; BIGENDIAN: .size v8i16_to_v4f32
300 define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
302 %0 = load volatile <8 x i16>* %src
303 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
304 %2 = bitcast <8 x i16> %1 to <2 x i64>
305 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
306 store <2 x i64> %3, <2 x i64>* %dst
310 ; LITENDIAN: v8i16_to_v2i64:
311 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
312 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
313 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
314 ; LITENDIAN: st.d [[R3]],
315 ; LITENDIAN: .size v8i16_to_v2i64
317 ; BIGENDIAN: v8i16_to_v2i64:
318 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
319 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
320 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
321 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
322 ; BIGENDIAN: st.d [[R4]],
323 ; BIGENDIAN: .size v8i16_to_v2i64
325 define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
327 %0 = load volatile <8 x i16>* %src
328 %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
329 %2 = bitcast <8 x i16> %1 to <2 x double>
330 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
331 store <2 x double> %3, <2 x double>* %dst
335 ; LITENDIAN: v8i16_to_v2f64:
336 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
337 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
338 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
339 ; LITENDIAN: st.d [[R3]],
340 ; LITENDIAN: .size v8i16_to_v2f64
342 ; BIGENDIAN: v8i16_to_v2f64:
343 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
344 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
345 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
346 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
347 ; BIGENDIAN: st.d [[R4]],
348 ; BIGENDIAN: .size v8i16_to_v2f64
351 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
352 ; are no operations for v8f16 to put in the way.
353 define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
355 %0 = load volatile <8 x half>* %src
356 %1 = bitcast <8 x half> %0 to <16 x i8>
357 %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
358 store <16 x i8> %2, <16 x i8>* %dst
362 ; LITENDIAN: v8f16_to_v16i8:
363 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
364 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
365 ; LITENDIAN: st.b [[R3]],
366 ; LITENDIAN: .size v8f16_to_v16i8
368 ; BIGENDIAN: v8f16_to_v16i8:
369 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
370 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
371 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
372 ; BIGENDIAN: st.b [[R4]],
373 ; BIGENDIAN: .size v8f16_to_v16i8
375 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
376 ; are no operations for v8f16 to put in the way.
377 define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
379 %0 = load volatile <8 x half>* %src
380 %1 = bitcast <8 x half> %0 to <8 x i16>
381 %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
382 store <8 x i16> %2, <8 x i16>* %dst
386 ; LITENDIAN: v8f16_to_v8i16:
387 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
388 ; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
389 ; LITENDIAN: st.h [[R2]],
390 ; LITENDIAN: .size v8f16_to_v8i16
392 ; BIGENDIAN: v8f16_to_v8i16:
393 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
394 ; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
395 ; BIGENDIAN: st.h [[R2]],
396 ; BIGENDIAN: .size v8f16_to_v8i16
398 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
399 ; are no operations for v8f16 to put in the way.
400 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
401 ; are no operations for v8f16 to put in the way.
402 define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
404 %0 = load volatile <8 x half>* %src
405 %1 = bitcast <8 x half> %0 to <8 x half>
406 store <8 x half> %1, <8 x half>* %dst
410 ; LITENDIAN: v8f16_to_v8f16:
411 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
412 ; LITENDIAN: st.h [[R1]],
413 ; LITENDIAN: .size v8f16_to_v8f16
415 ; BIGENDIAN: v8f16_to_v8f16:
416 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
417 ; BIGENDIAN: st.h [[R1]],
418 ; BIGENDIAN: .size v8f16_to_v8f16
420 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
421 ; are no operations for v8f16 to put in the way.
422 define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
424 %0 = load volatile <8 x half>* %src
425 %1 = bitcast <8 x half> %0 to <4 x i32>
426 %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
427 store <4 x i32> %2, <4 x i32>* %dst
431 ; LITENDIAN: v8f16_to_v4i32:
432 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
433 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
434 ; LITENDIAN: st.w [[R2]],
435 ; LITENDIAN: .size v8f16_to_v4i32
437 ; BIGENDIAN: v8f16_to_v4i32:
438 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
439 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
440 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
441 ; BIGENDIAN: st.w [[R3]],
442 ; BIGENDIAN: .size v8f16_to_v4i32
444 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
445 ; are no operations for v8f16 to put in the way.
446 define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
448 %0 = load volatile <8 x half>* %src
449 %1 = bitcast <8 x half> %0 to <4 x float>
450 %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
451 store <4 x float> %2, <4 x float>* %dst
455 ; LITENDIAN: v8f16_to_v4f32:
456 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
457 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
458 ; LITENDIAN: st.w [[R2]],
459 ; LITENDIAN: .size v8f16_to_v4f32
461 ; BIGENDIAN: v8f16_to_v4f32:
462 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
463 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
464 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
465 ; BIGENDIAN: st.w [[R3]],
466 ; BIGENDIAN: .size v8f16_to_v4f32
468 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
469 ; are no operations for v8f16 to put in the way.
470 define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
472 %0 = load volatile <8 x half>* %src
473 %1 = bitcast <8 x half> %0 to <2 x i64>
474 %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
475 store <2 x i64> %2, <2 x i64>* %dst
479 ; LITENDIAN: v8f16_to_v2i64:
480 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
481 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
482 ; LITENDIAN: st.d [[R2]],
483 ; LITENDIAN: .size v8f16_to_v2i64
485 ; BIGENDIAN: v8f16_to_v2i64:
486 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
487 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
488 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
489 ; BIGENDIAN: st.d [[R3]],
490 ; BIGENDIAN: .size v8f16_to_v2i64
492 ; We can't prevent the (bitcast (load X)) DAG Combine here because there
493 ; are no operations for v8f16 to put in the way.
494 define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
496 %0 = load volatile <8 x half>* %src
497 %1 = bitcast <8 x half> %0 to <2 x double>
498 %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
499 store <2 x double> %2, <2 x double>* %dst
503 ; LITENDIAN: v8f16_to_v2f64:
504 ; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
505 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
506 ; LITENDIAN: st.d [[R2]],
507 ; LITENDIAN: .size v8f16_to_v2f64
509 ; BIGENDIAN: v8f16_to_v2f64:
510 ; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
511 ; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
512 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
513 ; BIGENDIAN: st.d [[R3]],
514 ; BIGENDIAN: .size v8f16_to_v2f64
517 define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
519 %0 = load volatile <4 x i32>* %src
520 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
521 %2 = bitcast <4 x i32> %1 to <16 x i8>
522 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
523 store <16 x i8> %3, <16 x i8>* %dst
527 ; LITENDIAN: v4i32_to_v16i8:
528 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
529 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
530 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
531 ; LITENDIAN: st.b [[R3]],
532 ; LITENDIAN: .size v4i32_to_v16i8
534 ; BIGENDIAN: v4i32_to_v16i8:
535 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
536 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
537 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
538 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
539 ; BIGENDIAN: st.b [[R4]],
540 ; BIGENDIAN: .size v4i32_to_v16i8
542 define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
544 %0 = load volatile <4 x i32>* %src
545 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
546 %2 = bitcast <4 x i32> %1 to <8 x i16>
547 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
548 store <8 x i16> %3, <8 x i16>* %dst
552 ; LITENDIAN: v4i32_to_v8i16:
553 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
554 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
555 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
556 ; LITENDIAN: st.h [[R3]],
557 ; LITENDIAN: .size v4i32_to_v8i16
559 ; BIGENDIAN: v4i32_to_v8i16:
560 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
561 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
562 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
563 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
564 ; BIGENDIAN: st.h [[R4]],
565 ; BIGENDIAN: .size v4i32_to_v8i16
567 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
568 ; are no operations for v8f16 to put in the way.
569 define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
571 %0 = load volatile <4 x i32>* %src
572 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
573 %2 = bitcast <4 x i32> %1 to <8 x half>
574 store <8 x half> %2, <8 x half>* %dst
578 ; LITENDIAN: v4i32_to_v8f16:
579 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
580 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
581 ; LITENDIAN: st.w [[R2]],
582 ; LITENDIAN: .size v4i32_to_v8f16
584 ; BIGENDIAN: v4i32_to_v8f16:
585 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
586 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
587 ; BIGENDIAN: st.w [[R2]],
588 ; BIGENDIAN: .size v4i32_to_v8f16
590 define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
592 %0 = load volatile <4 x i32>* %src
593 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
594 %2 = bitcast <4 x i32> %1 to <4 x i32>
595 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
596 store <4 x i32> %3, <4 x i32>* %dst
600 ; LITENDIAN: v4i32_to_v4i32:
601 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
602 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
603 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
604 ; LITENDIAN: st.w [[R3]],
605 ; LITENDIAN: .size v4i32_to_v4i32
607 ; BIGENDIAN: v4i32_to_v4i32:
608 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
609 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
610 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
611 ; BIGENDIAN: st.w [[R3]],
612 ; BIGENDIAN: .size v4i32_to_v4i32
614 define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
616 %0 = load volatile <4 x i32>* %src
617 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
618 %2 = bitcast <4 x i32> %1 to <4 x float>
619 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
620 store <4 x float> %3, <4 x float>* %dst
624 ; LITENDIAN: v4i32_to_v4f32:
625 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
626 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
627 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
628 ; LITENDIAN: st.w [[R3]],
629 ; LITENDIAN: .size v4i32_to_v4f32
631 ; BIGENDIAN: v4i32_to_v4f32:
632 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
633 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
634 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
635 ; BIGENDIAN: st.w [[R3]],
636 ; BIGENDIAN: .size v4i32_to_v4f32
638 define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
640 %0 = load volatile <4 x i32>* %src
641 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
642 %2 = bitcast <4 x i32> %1 to <2 x i64>
643 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
644 store <2 x i64> %3, <2 x i64>* %dst
648 ; LITENDIAN: v4i32_to_v2i64:
649 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
650 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
651 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
652 ; LITENDIAN: st.d [[R3]],
653 ; LITENDIAN: .size v4i32_to_v2i64
655 ; BIGENDIAN: v4i32_to_v2i64:
656 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
657 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
658 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
659 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
660 ; BIGENDIAN: st.d [[R4]],
661 ; BIGENDIAN: .size v4i32_to_v2i64
663 define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
665 %0 = load volatile <4 x i32>* %src
666 %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
667 %2 = bitcast <4 x i32> %1 to <2 x double>
668 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
669 store <2 x double> %3, <2 x double>* %dst
673 ; LITENDIAN: v4i32_to_v2f64:
674 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
675 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
676 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
677 ; LITENDIAN: st.d [[R3]],
678 ; LITENDIAN: .size v4i32_to_v2f64
680 ; BIGENDIAN: v4i32_to_v2f64:
681 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
682 ; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
683 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
684 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
685 ; BIGENDIAN: st.d [[R4]],
686 ; BIGENDIAN: .size v4i32_to_v2f64
688 define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
690 %0 = load volatile <4 x float>* %src
691 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
692 %2 = bitcast <4 x float> %1 to <16 x i8>
693 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
694 store <16 x i8> %3, <16 x i8>* %dst
698 ; LITENDIAN: v4f32_to_v16i8:
699 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
700 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
701 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
702 ; LITENDIAN: st.b [[R3]],
703 ; LITENDIAN: .size v4f32_to_v16i8
705 ; BIGENDIAN: v4f32_to_v16i8:
706 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
707 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
708 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
709 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
710 ; BIGENDIAN: st.b [[R4]],
711 ; BIGENDIAN: .size v4f32_to_v16i8
713 define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
715 %0 = load volatile <4 x float>* %src
716 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
717 %2 = bitcast <4 x float> %1 to <8 x i16>
718 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
719 store <8 x i16> %3, <8 x i16>* %dst
723 ; LITENDIAN: v4f32_to_v8i16:
724 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
725 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
726 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
727 ; LITENDIAN: st.h [[R3]],
728 ; LITENDIAN: .size v4f32_to_v8i16
730 ; BIGENDIAN: v4f32_to_v8i16:
731 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
732 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
733 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
734 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
735 ; BIGENDIAN: st.h [[R4]],
736 ; BIGENDIAN: .size v4f32_to_v8i16
738 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
739 ; are no operations for v8f16 to put in the way.
740 define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
742 %0 = load volatile <4 x float>* %src
743 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
744 %2 = bitcast <4 x float> %1 to <8 x half>
745 store <8 x half> %2, <8 x half>* %dst
749 ; LITENDIAN: v4f32_to_v8f16:
750 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
751 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
752 ; LITENDIAN: st.w [[R2]],
753 ; LITENDIAN: .size v4f32_to_v8f16
755 ; BIGENDIAN: v4f32_to_v8f16:
756 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
757 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
758 ; BIGENDIAN: st.w [[R2]],
759 ; BIGENDIAN: .size v4f32_to_v8f16
761 define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
763 %0 = load volatile <4 x float>* %src
764 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
765 %2 = bitcast <4 x float> %1 to <4 x i32>
766 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
767 store <4 x i32> %3, <4 x i32>* %dst
771 ; LITENDIAN: v4f32_to_v4i32:
772 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
773 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
774 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
775 ; LITENDIAN: st.w [[R3]],
776 ; LITENDIAN: .size v4f32_to_v4i32
778 ; BIGENDIAN: v4f32_to_v4i32:
779 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
780 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
781 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
782 ; BIGENDIAN: st.w [[R3]],
783 ; BIGENDIAN: .size v4f32_to_v4i32
785 define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
787 %0 = load volatile <4 x float>* %src
788 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
789 %2 = bitcast <4 x float> %1 to <4 x float>
790 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
791 store <4 x float> %3, <4 x float>* %dst
795 ; LITENDIAN: v4f32_to_v4f32:
796 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
797 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
798 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
799 ; LITENDIAN: st.w [[R3]],
800 ; LITENDIAN: .size v4f32_to_v4f32
802 ; BIGENDIAN: v4f32_to_v4f32:
803 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
804 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
805 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
806 ; BIGENDIAN: st.w [[R3]],
807 ; BIGENDIAN: .size v4f32_to_v4f32
809 define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
811 %0 = load volatile <4 x float>* %src
812 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
813 %2 = bitcast <4 x float> %1 to <2 x i64>
814 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
815 store <2 x i64> %3, <2 x i64>* %dst
819 ; LITENDIAN: v4f32_to_v2i64:
820 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
821 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
822 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
823 ; LITENDIAN: st.d [[R3]],
824 ; LITENDIAN: .size v4f32_to_v2i64
826 ; BIGENDIAN: v4f32_to_v2i64:
827 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
828 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
829 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
830 ; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
831 ; BIGENDIAN: st.d [[R4]],
832 ; BIGENDIAN: .size v4f32_to_v2i64
834 define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
836 %0 = load volatile <4 x float>* %src
837 %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
838 %2 = bitcast <4 x float> %1 to <2 x double>
839 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
840 store <2 x double> %3, <2 x double>* %dst
844 ; LITENDIAN: v4f32_to_v2f64:
845 ; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
846 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
847 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
848 ; LITENDIAN: st.d [[R3]],
849 ; LITENDIAN: .size v4f32_to_v2f64
851 ; BIGENDIAN: v4f32_to_v2f64:
852 ; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
853 ; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
854 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
855 ; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
856 ; BIGENDIAN: st.d [[R4]],
857 ; BIGENDIAN: .size v4f32_to_v2f64
859 define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
861 %0 = load volatile <2 x i64>* %src
862 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
863 %2 = bitcast <2 x i64> %1 to <16 x i8>
864 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
865 store <16 x i8> %3, <16 x i8>* %dst
869 ; LITENDIAN: v2i64_to_v16i8:
870 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
871 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
872 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
873 ; LITENDIAN: st.b [[R3]],
874 ; LITENDIAN: .size v2i64_to_v16i8
876 ; BIGENDIAN: v2i64_to_v16i8:
877 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
878 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
879 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
880 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
881 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
882 ; BIGENDIAN: st.b [[R4]],
883 ; BIGENDIAN: .size v2i64_to_v16i8
885 define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
887 %0 = load volatile <2 x i64>* %src
888 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
889 %2 = bitcast <2 x i64> %1 to <8 x i16>
890 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
891 store <8 x i16> %3, <8 x i16>* %dst
895 ; LITENDIAN: v2i64_to_v8i16:
896 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
897 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
898 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
899 ; LITENDIAN: st.h [[R3]],
900 ; LITENDIAN: .size v2i64_to_v8i16
902 ; BIGENDIAN: v2i64_to_v8i16:
903 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
904 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
905 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
906 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
907 ; BIGENDIAN: st.h [[R4]],
908 ; BIGENDIAN: .size v2i64_to_v8i16
910 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
911 ; are no operations for v8f16 to put in the way.
912 define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
914 %0 = load volatile <2 x i64>* %src
915 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
916 %2 = bitcast <2 x i64> %1 to <8 x half>
917 store <8 x half> %2, <8 x half>* %dst
921 ; LITENDIAN: v2i64_to_v8f16:
922 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
923 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
924 ; LITENDIAN: st.d [[R2]],
925 ; LITENDIAN: .size v2i64_to_v8f16
927 ; BIGENDIAN: v2i64_to_v8f16:
928 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
929 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
930 ; BIGENDIAN: st.d [[R2]],
931 ; BIGENDIAN: .size v2i64_to_v8f16
933 define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
935 %0 = load volatile <2 x i64>* %src
936 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
937 %2 = bitcast <2 x i64> %1 to <4 x i32>
938 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
939 store <4 x i32> %3, <4 x i32>* %dst
943 ; LITENDIAN: v2i64_to_v4i32:
944 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
945 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
946 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
947 ; LITENDIAN: st.w [[R3]],
948 ; LITENDIAN: .size v2i64_to_v4i32
950 ; BIGENDIAN: v2i64_to_v4i32:
951 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
952 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
953 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
954 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
955 ; BIGENDIAN: st.w [[R4]],
956 ; BIGENDIAN: .size v2i64_to_v4i32
958 define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
960 %0 = load volatile <2 x i64>* %src
961 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
962 %2 = bitcast <2 x i64> %1 to <4 x float>
963 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
964 store <4 x float> %3, <4 x float>* %dst
968 ; LITENDIAN: v2i64_to_v4f32:
969 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
970 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
971 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
972 ; LITENDIAN: st.w [[R3]],
973 ; LITENDIAN: .size v2i64_to_v4f32
975 ; BIGENDIAN: v2i64_to_v4f32:
976 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
977 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
978 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
979 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
980 ; BIGENDIAN: st.w [[R4]],
981 ; BIGENDIAN: .size v2i64_to_v4f32
983 define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
985 %0 = load volatile <2 x i64>* %src
986 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
987 %2 = bitcast <2 x i64> %1 to <2 x i64>
988 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
989 store <2 x i64> %3, <2 x i64>* %dst
993 ; LITENDIAN: v2i64_to_v2i64:
994 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
995 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
996 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
997 ; LITENDIAN: st.d [[R3]],
998 ; LITENDIAN: .size v2i64_to_v2i64
1000 ; BIGENDIAN: v2i64_to_v2i64:
1001 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1002 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1003 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1004 ; BIGENDIAN: st.d [[R3]],
1005 ; BIGENDIAN: .size v2i64_to_v2i64
1007 define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
1009 %0 = load volatile <2 x i64>* %src
1010 %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
1011 %2 = bitcast <2 x i64> %1 to <2 x double>
1012 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
1013 store <2 x double> %3, <2 x double>* %dst
1017 ; LITENDIAN: v2i64_to_v2f64:
1018 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1019 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1020 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1021 ; LITENDIAN: st.d [[R3]],
1022 ; LITENDIAN: .size v2i64_to_v2f64
1024 ; BIGENDIAN: v2i64_to_v2f64:
1025 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1026 ; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1027 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1028 ; BIGENDIAN: st.d [[R3]],
1029 ; BIGENDIAN: .size v2i64_to_v2f64
1031 define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
1033 %0 = load volatile <2 x double>* %src
1034 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1035 %2 = bitcast <2 x double> %1 to <16 x i8>
1036 %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
1037 store <16 x i8> %3, <16 x i8>* %dst
1041 ; LITENDIAN: v2f64_to_v16i8:
1042 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1043 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1044 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1045 ; LITENDIAN: st.b [[R3]],
1046 ; LITENDIAN: .size v2f64_to_v16i8
1048 ; BIGENDIAN: v2f64_to_v16i8:
1049 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1050 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1051 ; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
1052 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
1053 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1054 ; BIGENDIAN: st.b [[R4]],
1055 ; BIGENDIAN: .size v2f64_to_v16i8
1057 define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
1059 %0 = load volatile <2 x double>* %src
1060 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1061 %2 = bitcast <2 x double> %1 to <8 x i16>
1062 %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
1063 store <8 x i16> %3, <8 x i16>* %dst
1067 ; LITENDIAN: v2f64_to_v8i16:
1068 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1069 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1070 ; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1071 ; LITENDIAN: st.h [[R3]],
1072 ; LITENDIAN: .size v2f64_to_v8i16
1074 ; BIGENDIAN: v2f64_to_v8i16:
1075 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1076 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1077 ; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
1078 ; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1079 ; BIGENDIAN: st.h [[R4]],
1080 ; BIGENDIAN: .size v2f64_to_v8i16
1082 ; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
1083 ; are no operations for v8f16 to put in the way.
1084 define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
1086 %0 = load volatile <2 x double>* %src
1087 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1088 %2 = bitcast <2 x double> %1 to <8 x half>
1089 store <8 x half> %2, <8 x half>* %dst
1093 ; LITENDIAN: v2f64_to_v8f16:
1094 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1095 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1096 ; LITENDIAN: st.d [[R2]],
1097 ; LITENDIAN: .size v2f64_to_v8f16
1099 ; BIGENDIAN: v2f64_to_v8f16:
1100 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1101 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1102 ; BIGENDIAN: st.d [[R2]],
1103 ; BIGENDIAN: .size v2f64_to_v8f16
1105 define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
1107 %0 = load volatile <2 x double>* %src
1108 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1109 %2 = bitcast <2 x double> %1 to <4 x i32>
1110 %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
1111 store <4 x i32> %3, <4 x i32>* %dst
1115 ; LITENDIAN: v2f64_to_v4i32:
1116 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1117 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1118 ; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1119 ; LITENDIAN: st.w [[R3]],
1120 ; LITENDIAN: .size v2f64_to_v4i32
1122 ; BIGENDIAN: v2f64_to_v4i32:
1123 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1124 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1125 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
1126 ; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1127 ; BIGENDIAN: st.w [[R4]],
1128 ; BIGENDIAN: .size v2f64_to_v4i32
1130 define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
1132 %0 = load volatile <2 x double>* %src
1133 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1134 %2 = bitcast <2 x double> %1 to <4 x float>
1135 %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
1136 store <4 x float> %3, <4 x float>* %dst
1140 ; LITENDIAN: v2f64_to_v4f32:
1141 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1142 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1143 ; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1144 ; LITENDIAN: st.w [[R3]],
1145 ; LITENDIAN: .size v2f64_to_v4f32
1147 ; BIGENDIAN: v2f64_to_v4f32:
1148 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1149 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1150 ; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
1151 ; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
1152 ; BIGENDIAN: st.w [[R4]],
1153 ; BIGENDIAN: .size v2f64_to_v4f32
1155 define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
1157 %0 = load volatile <2 x double>* %src
1158 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1159 %2 = bitcast <2 x double> %1 to <2 x i64>
1160 %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
1161 store <2 x i64> %3, <2 x i64>* %dst
1165 ; LITENDIAN: v2f64_to_v2i64:
1166 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1167 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1168 ; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1169 ; LITENDIAN: st.d [[R3]],
1170 ; LITENDIAN: .size v2f64_to_v2i64
1172 ; BIGENDIAN: v2f64_to_v2i64:
1173 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1174 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1175 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1176 ; BIGENDIAN: st.d [[R3]],
1177 ; BIGENDIAN: .size v2f64_to_v2i64
1179 define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
1181 %0 = load volatile <2 x double>* %src
1182 %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
1183 %2 = bitcast <2 x double> %1 to <2 x double>
1184 %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
1185 store <2 x double> %3, <2 x double>* %dst
1189 ; LITENDIAN: v2f64_to_v2f64:
1190 ; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
1191 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1192 ; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1193 ; LITENDIAN: st.d [[R3]],
1194 ; LITENDIAN: .size v2f64_to_v2f64
1196 ; BIGENDIAN: v2f64_to_v2f64:
1197 ; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
1198 ; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
1199 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1200 ; BIGENDIAN: st.d [[R3]],
1201 ; BIGENDIAN: .size v2f64_to_v2f64
1203 declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
1204 declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
1205 declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
1206 declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
1207 declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
1208 declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind