1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-BE %s
2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32-LE %s
4 @v4i8 = global <4 x i8> <i8 0, i8 0, i8 0, i8 0>
5 @v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
6 @v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
7 @v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
8 @v2i64 = global <2 x i64> <i64 0, i64 0>
12 define void @const_v16i8() nounwind {
13 ; ALL-LABEL: const_v16i8:
15 store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
16 ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
18 store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>*@v16i8
19 ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
21 store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
22 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
23 ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
25 store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
26 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
27 ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
29 store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8
30 ; MIPS32-BE: ldi.h [[R1:\$w[0-9]+]], 256
31 ; MIPS32-LE: ldi.h [[R1:\$w[0-9]+]], 1
33 store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>, <16 x i8>*@v16i8
34 ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 258
35 ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 1027
36 ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 772
37 ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 513
38 ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
40 store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
41 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
42 ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
47 define void @const_v8i16() nounwind {
48 ; ALL-LABEL: const_v8i16:
50 store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
51 ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
53 store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>*@v8i16
54 ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
56 store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
57 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
58 ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
60 store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
61 ; ALL: ldi.b [[R1:\$w[0-9]+]], 4
63 store volatile <8 x i16> <i16 1, i16 2, i16 1, i16 2, i16 1, i16 2, i16 1, i16 2>, <8 x i16>*@v8i16
64 ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 1
65 ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 2
66 ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 2
67 ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 1
68 ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
70 store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
71 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
72 ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
77 define void @const_v4i32() nounwind {
78 ; ALL-LABEL: const_v4i32:
80 store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
81 ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
83 store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>*@v4i32
84 ; ALL: ldi.w [[R1:\$w[0-9]+]], 1
86 store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
87 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
88 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
90 store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
91 ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
93 store volatile <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>, <4 x i32>*@v4i32
94 ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
96 store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
97 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
98 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
100 store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
101 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
102 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
107 define void @const_v2i64() nounwind {
108 ; ALL-LABEL: const_v2i64:
110 store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
111 ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
113 store volatile <2 x i64> <i64 72340172838076673, i64 72340172838076673>, <2 x i64>*@v2i64
114 ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
116 store volatile <2 x i64> <i64 281479271743489, i64 281479271743489>, <2 x i64>*@v2i64
117 ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
119 store volatile <2 x i64> <i64 4294967297, i64 4294967297>, <2 x i64>*@v2i64
120 ; ALL: ldi.w [[R1:\$w[0-9]+]], 1
122 store volatile <2 x i64> <i64 1, i64 1>, <2 x i64>*@v2i64
123 ; ALL: ldi.d [[R1:\$w[0-9]+]], 1
125 store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
126 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
127 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
129 store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
130 ; ALL: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
131 ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
136 define void @nonconst_v16i8(i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d, i8 signext %e, i8 signext %f, i8 signext %g, i8 signext %h) nounwind {
137 ; ALL-LABEL: nonconst_v16i8:
139 %1 = insertelement <16 x i8> undef, i8 %a, i32 0
140 %2 = insertelement <16 x i8> %1, i8 %b, i32 1
141 %3 = insertelement <16 x i8> %2, i8 %c, i32 2
142 %4 = insertelement <16 x i8> %3, i8 %d, i32 3
143 %5 = insertelement <16 x i8> %4, i8 %e, i32 4
144 %6 = insertelement <16 x i8> %5, i8 %f, i32 5
145 %7 = insertelement <16 x i8> %6, i8 %g, i32 6
146 %8 = insertelement <16 x i8> %7, i8 %h, i32 7
147 %9 = insertelement <16 x i8> %8, i8 %h, i32 8
148 %10 = insertelement <16 x i8> %9, i8 %h, i32 9
149 %11 = insertelement <16 x i8> %10, i8 %h, i32 10
150 %12 = insertelement <16 x i8> %11, i8 %h, i32 11
151 %13 = insertelement <16 x i8> %12, i8 %h, i32 12
152 %14 = insertelement <16 x i8> %13, i8 %h, i32 13
153 %15 = insertelement <16 x i8> %14, i8 %h, i32 14
154 %16 = insertelement <16 x i8> %15, i8 %h, i32 15
155 ; ALL-DAG: insert.b [[R1:\$w[0-9]+]][0], $4
156 ; ALL-DAG: insert.b [[R1]][1], $5
157 ; ALL-DAG: insert.b [[R1]][2], $6
158 ; ALL-DAG: insert.b [[R1]][3], $7
159 ; ALL-DAG: lw [[R2:\$[0-9]+]], 16($sp)
160 ; ALL-DAG: insert.b [[R1]][4], [[R2]]
161 ; ALL-DAG: lw [[R3:\$[0-9]+]], 20($sp)
162 ; ALL-DAG: insert.b [[R1]][5], [[R3]]
163 ; ALL-DAG: lw [[R4:\$[0-9]+]], 24($sp)
164 ; ALL-DAG: insert.b [[R1]][6], [[R4]]
165 ; ALL-DAG: lw [[R5:\$[0-9]+]], 28($sp)
166 ; ALL-DAG: insert.b [[R1]][7], [[R5]]
167 ; ALL-DAG: insert.b [[R1]][8], [[R5]]
168 ; ALL-DAG: insert.b [[R1]][9], [[R5]]
169 ; ALL-DAG: insert.b [[R1]][10], [[R5]]
170 ; ALL-DAG: insert.b [[R1]][11], [[R5]]
171 ; ALL-DAG: insert.b [[R1]][12], [[R5]]
172 ; ALL-DAG: insert.b [[R1]][13], [[R5]]
173 ; ALL-DAG: insert.b [[R1]][14], [[R5]]
174 ; ALL-DAG: insert.b [[R1]][15], [[R5]]
176 store volatile <16 x i8> %16, <16 x i8>*@v16i8
181 define void @nonconst_v8i16(i16 signext %a, i16 signext %b, i16 signext %c, i16 signext %d, i16 signext %e, i16 signext %f, i16 signext %g, i16 signext %h) nounwind {
182 ; ALL-LABEL: nonconst_v8i16:
184 %1 = insertelement <8 x i16> undef, i16 %a, i32 0
185 %2 = insertelement <8 x i16> %1, i16 %b, i32 1
186 %3 = insertelement <8 x i16> %2, i16 %c, i32 2
187 %4 = insertelement <8 x i16> %3, i16 %d, i32 3
188 %5 = insertelement <8 x i16> %4, i16 %e, i32 4
189 %6 = insertelement <8 x i16> %5, i16 %f, i32 5
190 %7 = insertelement <8 x i16> %6, i16 %g, i32 6
191 %8 = insertelement <8 x i16> %7, i16 %h, i32 7
192 ; ALL-DAG: insert.h [[R1:\$w[0-9]+]][0], $4
193 ; ALL-DAG: insert.h [[R1]][1], $5
194 ; ALL-DAG: insert.h [[R1]][2], $6
195 ; ALL-DAG: insert.h [[R1]][3], $7
196 ; ALL-DAG: lw [[R2:\$[0-9]+]], 16($sp)
197 ; ALL-DAG: insert.h [[R1]][4], [[R2]]
198 ; ALL-DAG: lw [[R2:\$[0-9]+]], 20($sp)
199 ; ALL-DAG: insert.h [[R1]][5], [[R2]]
200 ; ALL-DAG: lw [[R2:\$[0-9]+]], 24($sp)
201 ; ALL-DAG: insert.h [[R1]][6], [[R2]]
202 ; ALL-DAG: lw [[R2:\$[0-9]+]], 28($sp)
203 ; ALL-DAG: insert.h [[R1]][7], [[R2]]
205 store volatile <8 x i16> %8, <8 x i16>*@v8i16
210 define void @nonconst_v4i32(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) nounwind {
211 ; ALL-LABEL: nonconst_v4i32:
213 %1 = insertelement <4 x i32> undef, i32 %a, i32 0
214 %2 = insertelement <4 x i32> %1, i32 %b, i32 1
215 %3 = insertelement <4 x i32> %2, i32 %c, i32 2
216 %4 = insertelement <4 x i32> %3, i32 %d, i32 3
217 ; ALL: insert.w [[R1:\$w[0-9]+]][0], $4
218 ; ALL: insert.w [[R1]][1], $5
219 ; ALL: insert.w [[R1]][2], $6
220 ; ALL: insert.w [[R1]][3], $7
222 store volatile <4 x i32> %4, <4 x i32>*@v4i32
227 define void @nonconst_v2i64(i64 signext %a, i64 signext %b) nounwind {
228 ; ALL-LABEL: nonconst_v2i64:
230 %1 = insertelement <2 x i64> undef, i64 %a, i32 0
231 %2 = insertelement <2 x i64> %1, i64 %b, i32 1
232 ; ALL: insert.w [[R1:\$w[0-9]+]][0], $4
233 ; ALL: insert.w [[R1]][1], $5
234 ; ALL: insert.w [[R1]][2], $6
235 ; ALL: insert.w [[R1]][3], $7
237 store volatile <2 x i64> %2, <2 x i64>*@v2i64
242 define i32 @extract_sext_v16i8() nounwind {
243 ; ALL-LABEL: extract_sext_v16i8:
245 %1 = load <16 x i8>, <16 x i8>* @v16i8
246 ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
248 %2 = add <16 x i8> %1, %1
249 ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
251 %3 = extractelement <16 x i8> %2, i32 1
252 %4 = sext i8 %3 to i32
253 ; ALL-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1]
260 define i32 @extract_sext_v8i16() nounwind {
261 ; ALL-LABEL: extract_sext_v8i16:
263 %1 = load <8 x i16>, <8 x i16>* @v8i16
264 ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
266 %2 = add <8 x i16> %1, %1
267 ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
269 %3 = extractelement <8 x i16> %2, i32 1
270 %4 = sext i16 %3 to i32
271 ; ALL-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1]
278 define i32 @extract_sext_v4i32() nounwind {
279 ; ALL-LABEL: extract_sext_v4i32:
281 %1 = load <4 x i32>, <4 x i32>* @v4i32
282 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
284 %2 = add <4 x i32> %1, %1
285 ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
287 %3 = extractelement <4 x i32> %2, i32 1
288 ; ALL-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1]
293 define i64 @extract_sext_v2i64() nounwind {
294 ; ALL-LABEL: extract_sext_v2i64:
296 %1 = load <2 x i64>, <2 x i64>* @v2i64
297 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
299 %2 = add <2 x i64> %1, %1
300 ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
302 %3 = extractelement <2 x i64> %2, i32 1
303 ; ALL-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2]
304 ; ALL-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3]
311 define i32 @extract_zext_v16i8() nounwind {
312 ; ALL-LABEL: extract_zext_v16i8:
314 %1 = load <16 x i8>, <16 x i8>* @v16i8
315 ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
317 %2 = add <16 x i8> %1, %1
318 ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
320 %3 = extractelement <16 x i8> %2, i32 1
321 %4 = zext i8 %3 to i32
322 ; ALL-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1]
328 define i32 @extract_zext_v8i16() nounwind {
329 ; ALL-LABEL: extract_zext_v8i16:
331 %1 = load <8 x i16>, <8 x i16>* @v8i16
332 ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
334 %2 = add <8 x i16> %1, %1
335 ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
337 %3 = extractelement <8 x i16> %2, i32 1
338 %4 = zext i16 %3 to i32
339 ; ALL-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1]
345 define i32 @extract_zext_v4i32() nounwind {
346 ; ALL-LABEL: extract_zext_v4i32:
348 %1 = load <4 x i32>, <4 x i32>* @v4i32
349 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
351 %2 = add <4 x i32> %1, %1
352 ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
354 %3 = extractelement <4 x i32> %2, i32 1
355 ; ALL-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1]
360 define i64 @extract_zext_v2i64() nounwind {
361 ; ALL-LABEL: extract_zext_v2i64:
363 %1 = load <2 x i64>, <2 x i64>* @v2i64
364 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
366 %2 = add <2 x i64> %1, %1
367 ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
369 %3 = extractelement <2 x i64> %2, i32 1
370 ; ALL-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2]
371 ; ALL-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3]
377 define i32 @extract_sext_v16i8_vidx() nounwind {
378 ; ALL-LABEL: extract_sext_v16i8_vidx:
380 %1 = load <16 x i8>, <16 x i8>* @v16i8
381 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
382 ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
384 %2 = add <16 x i8> %1, %1
385 ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
387 %3 = load i32, i32* @i32
388 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
389 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
391 %4 = extractelement <16 x i8> %2, i32 %3
392 %5 = sext i8 %4 to i32
393 ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
394 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
395 ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 24
400 define i32 @extract_sext_v8i16_vidx() nounwind {
401 ; ALL-LABEL: extract_sext_v8i16_vidx:
403 %1 = load <8 x i16>, <8 x i16>* @v8i16
404 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
405 ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
407 %2 = add <8 x i16> %1, %1
408 ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
410 %3 = load i32, i32* @i32
411 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
412 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
414 %4 = extractelement <8 x i16> %2, i32 %3
415 %5 = sext i16 %4 to i32
416 ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
417 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
418 ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 16
423 define i32 @extract_sext_v4i32_vidx() nounwind {
424 ; ALL-LABEL: extract_sext_v4i32_vidx:
426 %1 = load <4 x i32>, <4 x i32>* @v4i32
427 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
428 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
430 %2 = add <4 x i32> %1, %1
431 ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
433 %3 = load i32, i32* @i32
434 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
435 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
437 %4 = extractelement <4 x i32> %2, i32 %3
438 ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
439 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
445 define i64 @extract_sext_v2i64_vidx() nounwind {
446 ; ALL-LABEL: extract_sext_v2i64_vidx:
448 %1 = load <2 x i64>, <2 x i64>* @v2i64
449 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
450 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
452 %2 = add <2 x i64> %1, %1
453 ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
455 %3 = load i32, i32* @i32
456 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
457 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
459 %4 = extractelement <2 x i64> %2, i32 %3
460 ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
461 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
462 ; ALL-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
463 ; ALL-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
469 define i32 @extract_zext_v16i8_vidx() nounwind {
470 ; ALL-LABEL: extract_zext_v16i8_vidx:
472 %1 = load <16 x i8>, <16 x i8>* @v16i8
473 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
474 ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
476 %2 = add <16 x i8> %1, %1
477 ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
479 %3 = load i32, i32* @i32
480 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
481 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
483 %4 = extractelement <16 x i8> %2, i32 %3
484 %5 = zext i8 %4 to i32
485 ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
486 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
487 ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 24
492 define i32 @extract_zext_v8i16_vidx() nounwind {
493 ; ALL-LABEL: extract_zext_v8i16_vidx:
495 %1 = load <8 x i16>, <8 x i16>* @v8i16
496 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
497 ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
499 %2 = add <8 x i16> %1, %1
500 ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
502 %3 = load i32, i32* @i32
503 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
504 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
506 %4 = extractelement <8 x i16> %2, i32 %3
507 %5 = zext i16 %4 to i32
508 ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
509 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
510 ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 16
515 define i32 @extract_zext_v4i32_vidx() nounwind {
516 ; ALL-LABEL: extract_zext_v4i32_vidx:
518 %1 = load <4 x i32>, <4 x i32>* @v4i32
519 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
520 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
522 %2 = add <4 x i32> %1, %1
523 ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
525 %3 = load i32, i32* @i32
526 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
527 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
529 %4 = extractelement <4 x i32> %2, i32 %3
530 ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
531 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
537 define i64 @extract_zext_v2i64_vidx() nounwind {
538 ; ALL-LABEL: extract_zext_v2i64_vidx:
540 %1 = load <2 x i64>, <2 x i64>* @v2i64
541 ; ALL-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
542 ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
544 %2 = add <2 x i64> %1, %1
545 ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
547 %3 = load i32, i32* @i32
548 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
549 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
551 %4 = extractelement <2 x i64> %2, i32 %3
552 ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
553 ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
554 ; ALL-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
555 ; ALL-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
561 define void @insert_v16i8(i32 signext %a) nounwind {
562 ; ALL-LABEL: insert_v16i8:
564 %1 = load <16 x i8>, <16 x i8>* @v16i8
565 ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
567 %a2 = trunc i32 %a to i8
568 %a3 = sext i8 %a2 to i32
569 %a4 = trunc i32 %a3 to i8
573 %2 = insertelement <16 x i8> %1, i8 %a4, i32 1
574 ; ALL-DAG: insert.b [[R1]][1], $4
576 store <16 x i8> %2, <16 x i8>* @v16i8
577 ; ALL-DAG: st.b [[R1]]
582 define void @insert_v8i16(i32 signext %a) nounwind {
583 ; ALL-LABEL: insert_v8i16:
585 %1 = load <8 x i16>, <8 x i16>* @v8i16
586 ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
588 %a2 = trunc i32 %a to i16
589 %a3 = sext i16 %a2 to i32
590 %a4 = trunc i32 %a3 to i16
594 %2 = insertelement <8 x i16> %1, i16 %a4, i32 1
595 ; ALL-DAG: insert.h [[R1]][1], $4
597 store <8 x i16> %2, <8 x i16>* @v8i16
598 ; ALL-DAG: st.h [[R1]]
603 define void @insert_v4i32(i32 signext %a) nounwind {
604 ; ALL-LABEL: insert_v4i32:
606 %1 = load <4 x i32>, <4 x i32>* @v4i32
607 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
612 %2 = insertelement <4 x i32> %1, i32 %a, i32 1
613 ; ALL-DAG: insert.w [[R1]][1], $4
615 store <4 x i32> %2, <4 x i32>* @v4i32
616 ; ALL-DAG: st.w [[R1]]
621 define void @insert_v2i64(i64 signext %a) nounwind {
622 ; ALL-LABEL: insert_v2i64:
624 %1 = load <2 x i64>, <2 x i64>* @v2i64
625 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
630 %2 = insertelement <2 x i64> %1, i64 %a, i32 1
631 ; ALL-DAG: insert.w [[R1]][2], $4
632 ; ALL-DAG: insert.w [[R1]][3], $5
634 store <2 x i64> %2, <2 x i64>* @v2i64
635 ; ALL-DAG: st.w [[R1]]
640 define void @insert_v16i8_vidx(i32 signext %a) nounwind {
641 ; ALL-LABEL: insert_v16i8_vidx:
643 %1 = load <16 x i8>, <16 x i8>* @v16i8
644 ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
646 %2 = load i32, i32* @i32
647 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
648 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
650 %a2 = trunc i32 %a to i8
651 %a3 = sext i8 %a2 to i32
652 %a4 = trunc i32 %a3 to i8
656 %3 = insertelement <16 x i8> %1, i8 %a4, i32 %2
657 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[IDX]]]
658 ; ALL-DAG: insert.b [[R1]][0], $4
659 ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
660 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
662 store <16 x i8> %3, <16 x i8>* @v16i8
663 ; ALL-DAG: st.b [[R1]]
668 define void @insert_v8i16_vidx(i32 %a) nounwind {
669 ; ALL-LABEL: insert_v8i16_vidx:
671 %1 = load <8 x i16>, <8 x i16>* @v8i16
672 ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
674 %2 = load i32, i32* @i32
675 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
676 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
678 %a2 = trunc i32 %a to i16
679 %a3 = sext i16 %a2 to i32
680 %a4 = trunc i32 %a3 to i16
684 %3 = insertelement <8 x i16> %1, i16 %a4, i32 %2
685 ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 1
686 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
687 ; ALL-DAG: insert.h [[R1]][0], $4
688 ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
689 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
691 store <8 x i16> %3, <8 x i16>* @v8i16
692 ; ALL-DAG: st.h [[R1]]
697 define void @insert_v4i32_vidx(i32 signext %a) nounwind {
698 ; ALL-LABEL: insert_v4i32_vidx:
700 %1 = load <4 x i32>, <4 x i32>* @v4i32
701 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
703 %2 = load i32, i32* @i32
704 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
705 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
710 %3 = insertelement <4 x i32> %1, i32 %a, i32 %2
711 ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
712 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
713 ; ALL-DAG: insert.w [[R1]][0], $4
714 ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
715 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
717 store <4 x i32> %3, <4 x i32>* @v4i32
718 ; ALL-DAG: st.w [[R1]]
723 define void @insert_v2i64_vidx(i64 signext %a) nounwind {
724 ; ALL-LABEL: insert_v2i64_vidx:
726 %1 = load <2 x i64>, <2 x i64>* @v2i64
727 ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
729 %2 = load i32, i32* @i32
730 ; ALL-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
731 ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
736 %3 = insertelement <2 x i64> %1, i64 %a, i32 %2
737 ; TODO: This code could be a lot better but it works. The legalizer splits
738 ; 64-bit inserts into two 32-bit inserts because there is no i64 type on
739 ; MIPS32. The obvious optimisation is to perform both insert.w's at once while
740 ; the vector is rotated.
741 ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
742 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
743 ; ALL-DAG: insert.w [[R1]][0], $4
744 ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
745 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
746 ; ALL-DAG: addiu [[IDX2:\$[0-9]+]], [[IDX]], 1
747 ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX2]], 2
748 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
749 ; ALL-DAG: insert.w [[R1]][0], $5
750 ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
751 ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
753 store <2 x i64> %3, <2 x i64>* @v2i64
754 ; ALL-DAG: st.w [[R1]]
759 define void @truncstore() nounwind {
760 ; ALL-LABEL: truncstore:
762 store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>*@v4i8
763 ; TODO: What code should be emitted?