regmap: rbtree: Fixed node range check on sync
[firefly-linux-kernel-4.4.55.git] / arch / powerpc / lib / copyuser_power7.S
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15  *
16  * Copyright (C) IBM Corporation, 2011
17  *
18  * Author: Anton Blanchard <anton@au.ibm.com>
19  */
20 #include <asm/ppc_asm.h>
21
22         .macro err1
23 100:
24         .section __ex_table,"a"
25         .align 3
26         .llong 100b,.Ldo_err1
27         .previous
28         .endm
29
30         .macro err2
31 200:
32         .section __ex_table,"a"
33         .align 3
34         .llong 200b,.Ldo_err2
35         .previous
36         .endm
37
38 #ifdef CONFIG_ALTIVEC
39         .macro err3
40 300:
41         .section __ex_table,"a"
42         .align 3
43         .llong 300b,.Ldo_err3
44         .previous
45         .endm
46
47         .macro err4
48 400:
49         .section __ex_table,"a"
50         .align 3
51         .llong 400b,.Ldo_err4
52         .previous
53         .endm
54
55
56 .Ldo_err4:
57         ld      r16,STK_REG(R16)(r1)
58         ld      r15,STK_REG(R15)(r1)
59         ld      r14,STK_REG(R14)(r1)
60 .Ldo_err3:
61         bl      .exit_vmx_usercopy
62         ld      r0,STACKFRAMESIZE+16(r1)
63         mtlr    r0
64         b       .Lexit
65 #endif /* CONFIG_ALTIVEC */
66
67 .Ldo_err2:
68         ld      r22,STK_REG(R22)(r1)
69         ld      r21,STK_REG(R21)(r1)
70         ld      r20,STK_REG(R20)(r1)
71         ld      r19,STK_REG(R19)(r1)
72         ld      r18,STK_REG(R18)(r1)
73         ld      r17,STK_REG(R17)(r1)
74         ld      r16,STK_REG(R16)(r1)
75         ld      r15,STK_REG(R15)(r1)
76         ld      r14,STK_REG(R14)(r1)
77 .Lexit:
78         addi    r1,r1,STACKFRAMESIZE
79 .Ldo_err1:
80         ld      r3,48(r1)
81         ld      r4,56(r1)
82         ld      r5,64(r1)
83         b       __copy_tofrom_user_base
84
85
86 _GLOBAL(__copy_tofrom_user_power7)
87 #ifdef CONFIG_ALTIVEC
88         cmpldi  r5,16
89         cmpldi  cr1,r5,4096
90
91         std     r3,48(r1)
92         std     r4,56(r1)
93         std     r5,64(r1)
94
95         blt     .Lshort_copy
96         bgt     cr1,.Lvmx_copy
97 #else
98         cmpldi  r5,16
99
100         std     r3,48(r1)
101         std     r4,56(r1)
102         std     r5,64(r1)
103
104         blt     .Lshort_copy
105 #endif
106
107 .Lnonvmx_copy:
108         /* Get the source 8B aligned */
109         neg     r6,r4
110         mtocrf  0x01,r6
111         clrldi  r6,r6,(64-3)
112
113         bf      cr7*4+3,1f
114 err1;   lbz     r0,0(r4)
115         addi    r4,r4,1
116 err1;   stb     r0,0(r3)
117         addi    r3,r3,1
118
119 1:      bf      cr7*4+2,2f
120 err1;   lhz     r0,0(r4)
121         addi    r4,r4,2
122 err1;   sth     r0,0(r3)
123         addi    r3,r3,2
124
125 2:      bf      cr7*4+1,3f
126 err1;   lwz     r0,0(r4)
127         addi    r4,r4,4
128 err1;   stw     r0,0(r3)
129         addi    r3,r3,4
130
131 3:      sub     r5,r5,r6
132         cmpldi  r5,128
133         blt     5f
134
135         mflr    r0
136         stdu    r1,-STACKFRAMESIZE(r1)
137         std     r14,STK_REG(R14)(r1)
138         std     r15,STK_REG(R15)(r1)
139         std     r16,STK_REG(R16)(r1)
140         std     r17,STK_REG(R17)(r1)
141         std     r18,STK_REG(R18)(r1)
142         std     r19,STK_REG(R19)(r1)
143         std     r20,STK_REG(R20)(r1)
144         std     r21,STK_REG(R21)(r1)
145         std     r22,STK_REG(R22)(r1)
146         std     r0,STACKFRAMESIZE+16(r1)
147
148         srdi    r6,r5,7
149         mtctr   r6
150
151         /* Now do cacheline (128B) sized loads and stores. */
152         .align  5
153 4:
154 err2;   ld      r0,0(r4)
155 err2;   ld      r6,8(r4)
156 err2;   ld      r7,16(r4)
157 err2;   ld      r8,24(r4)
158 err2;   ld      r9,32(r4)
159 err2;   ld      r10,40(r4)
160 err2;   ld      r11,48(r4)
161 err2;   ld      r12,56(r4)
162 err2;   ld      r14,64(r4)
163 err2;   ld      r15,72(r4)
164 err2;   ld      r16,80(r4)
165 err2;   ld      r17,88(r4)
166 err2;   ld      r18,96(r4)
167 err2;   ld      r19,104(r4)
168 err2;   ld      r20,112(r4)
169 err2;   ld      r21,120(r4)
170         addi    r4,r4,128
171 err2;   std     r0,0(r3)
172 err2;   std     r6,8(r3)
173 err2;   std     r7,16(r3)
174 err2;   std     r8,24(r3)
175 err2;   std     r9,32(r3)
176 err2;   std     r10,40(r3)
177 err2;   std     r11,48(r3)
178 err2;   std     r12,56(r3)
179 err2;   std     r14,64(r3)
180 err2;   std     r15,72(r3)
181 err2;   std     r16,80(r3)
182 err2;   std     r17,88(r3)
183 err2;   std     r18,96(r3)
184 err2;   std     r19,104(r3)
185 err2;   std     r20,112(r3)
186 err2;   std     r21,120(r3)
187         addi    r3,r3,128
188         bdnz    4b
189
190         clrldi  r5,r5,(64-7)
191
192         ld      r14,STK_REG(R14)(r1)
193         ld      r15,STK_REG(R15)(r1)
194         ld      r16,STK_REG(R16)(r1)
195         ld      r17,STK_REG(R17)(r1)
196         ld      r18,STK_REG(R18)(r1)
197         ld      r19,STK_REG(R19)(r1)
198         ld      r20,STK_REG(R20)(r1)
199         ld      r21,STK_REG(R21)(r1)
200         ld      r22,STK_REG(R22)(r1)
201         addi    r1,r1,STACKFRAMESIZE
202
203         /* Up to 127B to go */
204 5:      srdi    r6,r5,4
205         mtocrf  0x01,r6
206
207 6:      bf      cr7*4+1,7f
208 err1;   ld      r0,0(r4)
209 err1;   ld      r6,8(r4)
210 err1;   ld      r7,16(r4)
211 err1;   ld      r8,24(r4)
212 err1;   ld      r9,32(r4)
213 err1;   ld      r10,40(r4)
214 err1;   ld      r11,48(r4)
215 err1;   ld      r12,56(r4)
216         addi    r4,r4,64
217 err1;   std     r0,0(r3)
218 err1;   std     r6,8(r3)
219 err1;   std     r7,16(r3)
220 err1;   std     r8,24(r3)
221 err1;   std     r9,32(r3)
222 err1;   std     r10,40(r3)
223 err1;   std     r11,48(r3)
224 err1;   std     r12,56(r3)
225         addi    r3,r3,64
226
227         /* Up to 63B to go */
228 7:      bf      cr7*4+2,8f
229 err1;   ld      r0,0(r4)
230 err1;   ld      r6,8(r4)
231 err1;   ld      r7,16(r4)
232 err1;   ld      r8,24(r4)
233         addi    r4,r4,32
234 err1;   std     r0,0(r3)
235 err1;   std     r6,8(r3)
236 err1;   std     r7,16(r3)
237 err1;   std     r8,24(r3)
238         addi    r3,r3,32
239
240         /* Up to 31B to go */
241 8:      bf      cr7*4+3,9f
242 err1;   ld      r0,0(r4)
243 err1;   ld      r6,8(r4)
244         addi    r4,r4,16
245 err1;   std     r0,0(r3)
246 err1;   std     r6,8(r3)
247         addi    r3,r3,16
248
249 9:      clrldi  r5,r5,(64-4)
250
251         /* Up to 15B to go */
252 .Lshort_copy:
253         mtocrf  0x01,r5
254         bf      cr7*4+0,12f
255 err1;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
256 err1;   lwz     r6,4(r4)
257         addi    r4,r4,8
258 err1;   stw     r0,0(r3)
259 err1;   stw     r6,4(r3)
260         addi    r3,r3,8
261
262 12:     bf      cr7*4+1,13f
263 err1;   lwz     r0,0(r4)
264         addi    r4,r4,4
265 err1;   stw     r0,0(r3)
266         addi    r3,r3,4
267
268 13:     bf      cr7*4+2,14f
269 err1;   lhz     r0,0(r4)
270         addi    r4,r4,2
271 err1;   sth     r0,0(r3)
272         addi    r3,r3,2
273
274 14:     bf      cr7*4+3,15f
275 err1;   lbz     r0,0(r4)
276 err1;   stb     r0,0(r3)
277
278 15:     li      r3,0
279         blr
280
281 .Lunwind_stack_nonvmx_copy:
282         addi    r1,r1,STACKFRAMESIZE
283         b       .Lnonvmx_copy
284
285 #ifdef CONFIG_ALTIVEC
286 .Lvmx_copy:
287         mflr    r0
288         std     r0,16(r1)
289         stdu    r1,-STACKFRAMESIZE(r1)
290         bl      .enter_vmx_usercopy
291         cmpwi   cr1,r3,0
292         ld      r0,STACKFRAMESIZE+16(r1)
293         ld      r3,STACKFRAMESIZE+48(r1)
294         ld      r4,STACKFRAMESIZE+56(r1)
295         ld      r5,STACKFRAMESIZE+64(r1)
296         mtlr    r0
297
298         /*
299          * We prefetch both the source and destination using enhanced touch
300          * instructions. We use a stream ID of 0 for the load side and
301          * 1 for the store side.
302          */
303         clrrdi  r6,r4,7
304         clrrdi  r9,r3,7
305         ori     r9,r9,1         /* stream=1 */
306
307         srdi    r7,r5,7         /* length in cachelines, capped at 0x3FF */
308         cmpldi  r7,0x3FF
309         ble     1f
310         li      r7,0x3FF
311 1:      lis     r0,0x0E00       /* depth=7 */
312         sldi    r7,r7,7
313         or      r7,r7,r0
314         ori     r10,r7,1        /* stream=1 */
315
316         lis     r8,0x8000       /* GO=1 */
317         clrldi  r8,r8,32
318
319 .machine push
320 .machine "power4"
321         dcbt    r0,r6,0b01000
322         dcbt    r0,r7,0b01010
323         dcbtst  r0,r9,0b01000
324         dcbtst  r0,r10,0b01010
325         eieio
326         dcbt    r0,r8,0b01010   /* GO */
327 .machine pop
328
329         beq     cr1,.Lunwind_stack_nonvmx_copy
330
331         /*
332          * If source and destination are not relatively aligned we use a
333          * slower permute loop.
334          */
335         xor     r6,r4,r3
336         rldicl. r6,r6,0,(64-4)
337         bne     .Lvmx_unaligned_copy
338
339         /* Get the destination 16B aligned */
340         neg     r6,r3
341         mtocrf  0x01,r6
342         clrldi  r6,r6,(64-4)
343
344         bf      cr7*4+3,1f
345 err3;   lbz     r0,0(r4)
346         addi    r4,r4,1
347 err3;   stb     r0,0(r3)
348         addi    r3,r3,1
349
350 1:      bf      cr7*4+2,2f
351 err3;   lhz     r0,0(r4)
352         addi    r4,r4,2
353 err3;   sth     r0,0(r3)
354         addi    r3,r3,2
355
356 2:      bf      cr7*4+1,3f
357 err3;   lwz     r0,0(r4)
358         addi    r4,r4,4
359 err3;   stw     r0,0(r3)
360         addi    r3,r3,4
361
362 3:      bf      cr7*4+0,4f
363 err3;   ld      r0,0(r4)
364         addi    r4,r4,8
365 err3;   std     r0,0(r3)
366         addi    r3,r3,8
367
368 4:      sub     r5,r5,r6
369
370         /* Get the desination 128B aligned */
371         neg     r6,r3
372         srdi    r7,r6,4
373         mtocrf  0x01,r7
374         clrldi  r6,r6,(64-7)
375
376         li      r9,16
377         li      r10,32
378         li      r11,48
379
380         bf      cr7*4+3,5f
381 err3;   lvx     vr1,r0,r4
382         addi    r4,r4,16
383 err3;   stvx    vr1,r0,r3
384         addi    r3,r3,16
385
386 5:      bf      cr7*4+2,6f
387 err3;   lvx     vr1,r0,r4
388 err3;   lvx     vr0,r4,r9
389         addi    r4,r4,32
390 err3;   stvx    vr1,r0,r3
391 err3;   stvx    vr0,r3,r9
392         addi    r3,r3,32
393
394 6:      bf      cr7*4+1,7f
395 err3;   lvx     vr3,r0,r4
396 err3;   lvx     vr2,r4,r9
397 err3;   lvx     vr1,r4,r10
398 err3;   lvx     vr0,r4,r11
399         addi    r4,r4,64
400 err3;   stvx    vr3,r0,r3
401 err3;   stvx    vr2,r3,r9
402 err3;   stvx    vr1,r3,r10
403 err3;   stvx    vr0,r3,r11
404         addi    r3,r3,64
405
406 7:      sub     r5,r5,r6
407         srdi    r6,r5,7
408
409         std     r14,STK_REG(R14)(r1)
410         std     r15,STK_REG(R15)(r1)
411         std     r16,STK_REG(R16)(r1)
412
413         li      r12,64
414         li      r14,80
415         li      r15,96
416         li      r16,112
417
418         mtctr   r6
419
420         /*
421          * Now do cacheline sized loads and stores. By this stage the
422          * cacheline stores are also cacheline aligned.
423          */
424         .align  5
425 8:
426 err4;   lvx     vr7,r0,r4
427 err4;   lvx     vr6,r4,r9
428 err4;   lvx     vr5,r4,r10
429 err4;   lvx     vr4,r4,r11
430 err4;   lvx     vr3,r4,r12
431 err4;   lvx     vr2,r4,r14
432 err4;   lvx     vr1,r4,r15
433 err4;   lvx     vr0,r4,r16
434         addi    r4,r4,128
435 err4;   stvx    vr7,r0,r3
436 err4;   stvx    vr6,r3,r9
437 err4;   stvx    vr5,r3,r10
438 err4;   stvx    vr4,r3,r11
439 err4;   stvx    vr3,r3,r12
440 err4;   stvx    vr2,r3,r14
441 err4;   stvx    vr1,r3,r15
442 err4;   stvx    vr0,r3,r16
443         addi    r3,r3,128
444         bdnz    8b
445
446         ld      r14,STK_REG(R14)(r1)
447         ld      r15,STK_REG(R15)(r1)
448         ld      r16,STK_REG(R16)(r1)
449
450         /* Up to 127B to go */
451         clrldi  r5,r5,(64-7)
452         srdi    r6,r5,4
453         mtocrf  0x01,r6
454
455         bf      cr7*4+1,9f
456 err3;   lvx     vr3,r0,r4
457 err3;   lvx     vr2,r4,r9
458 err3;   lvx     vr1,r4,r10
459 err3;   lvx     vr0,r4,r11
460         addi    r4,r4,64
461 err3;   stvx    vr3,r0,r3
462 err3;   stvx    vr2,r3,r9
463 err3;   stvx    vr1,r3,r10
464 err3;   stvx    vr0,r3,r11
465         addi    r3,r3,64
466
467 9:      bf      cr7*4+2,10f
468 err3;   lvx     vr1,r0,r4
469 err3;   lvx     vr0,r4,r9
470         addi    r4,r4,32
471 err3;   stvx    vr1,r0,r3
472 err3;   stvx    vr0,r3,r9
473         addi    r3,r3,32
474
475 10:     bf      cr7*4+3,11f
476 err3;   lvx     vr1,r0,r4
477         addi    r4,r4,16
478 err3;   stvx    vr1,r0,r3
479         addi    r3,r3,16
480
481         /* Up to 15B to go */
482 11:     clrldi  r5,r5,(64-4)
483         mtocrf  0x01,r5
484         bf      cr7*4+0,12f
485 err3;   ld      r0,0(r4)
486         addi    r4,r4,8
487 err3;   std     r0,0(r3)
488         addi    r3,r3,8
489
490 12:     bf      cr7*4+1,13f
491 err3;   lwz     r0,0(r4)
492         addi    r4,r4,4
493 err3;   stw     r0,0(r3)
494         addi    r3,r3,4
495
496 13:     bf      cr7*4+2,14f
497 err3;   lhz     r0,0(r4)
498         addi    r4,r4,2
499 err3;   sth     r0,0(r3)
500         addi    r3,r3,2
501
502 14:     bf      cr7*4+3,15f
503 err3;   lbz     r0,0(r4)
504 err3;   stb     r0,0(r3)
505
506 15:     addi    r1,r1,STACKFRAMESIZE
507         b       .exit_vmx_usercopy      /* tail call optimise */
508
509 .Lvmx_unaligned_copy:
510         /* Get the destination 16B aligned */
511         neg     r6,r3
512         mtocrf  0x01,r6
513         clrldi  r6,r6,(64-4)
514
515         bf      cr7*4+3,1f
516 err3;   lbz     r0,0(r4)
517         addi    r4,r4,1
518 err3;   stb     r0,0(r3)
519         addi    r3,r3,1
520
521 1:      bf      cr7*4+2,2f
522 err3;   lhz     r0,0(r4)
523         addi    r4,r4,2
524 err3;   sth     r0,0(r3)
525         addi    r3,r3,2
526
527 2:      bf      cr7*4+1,3f
528 err3;   lwz     r0,0(r4)
529         addi    r4,r4,4
530 err3;   stw     r0,0(r3)
531         addi    r3,r3,4
532
533 3:      bf      cr7*4+0,4f
534 err3;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
535 err3;   lwz     r7,4(r4)
536         addi    r4,r4,8
537 err3;   stw     r0,0(r3)
538 err3;   stw     r7,4(r3)
539         addi    r3,r3,8
540
541 4:      sub     r5,r5,r6
542
543         /* Get the desination 128B aligned */
544         neg     r6,r3
545         srdi    r7,r6,4
546         mtocrf  0x01,r7
547         clrldi  r6,r6,(64-7)
548
549         li      r9,16
550         li      r10,32
551         li      r11,48
552
553         lvsl    vr16,0,r4       /* Setup permute control vector */
554 err3;   lvx     vr0,0,r4
555         addi    r4,r4,16
556
557         bf      cr7*4+3,5f
558 err3;   lvx     vr1,r0,r4
559         vperm   vr8,vr0,vr1,vr16
560         addi    r4,r4,16
561 err3;   stvx    vr8,r0,r3
562         addi    r3,r3,16
563         vor     vr0,vr1,vr1
564
565 5:      bf      cr7*4+2,6f
566 err3;   lvx     vr1,r0,r4
567         vperm   vr8,vr0,vr1,vr16
568 err3;   lvx     vr0,r4,r9
569         vperm   vr9,vr1,vr0,vr16
570         addi    r4,r4,32
571 err3;   stvx    vr8,r0,r3
572 err3;   stvx    vr9,r3,r9
573         addi    r3,r3,32
574
575 6:      bf      cr7*4+1,7f
576 err3;   lvx     vr3,r0,r4
577         vperm   vr8,vr0,vr3,vr16
578 err3;   lvx     vr2,r4,r9
579         vperm   vr9,vr3,vr2,vr16
580 err3;   lvx     vr1,r4,r10
581         vperm   vr10,vr2,vr1,vr16
582 err3;   lvx     vr0,r4,r11
583         vperm   vr11,vr1,vr0,vr16
584         addi    r4,r4,64
585 err3;   stvx    vr8,r0,r3
586 err3;   stvx    vr9,r3,r9
587 err3;   stvx    vr10,r3,r10
588 err3;   stvx    vr11,r3,r11
589         addi    r3,r3,64
590
591 7:      sub     r5,r5,r6
592         srdi    r6,r5,7
593
594         std     r14,STK_REG(R14)(r1)
595         std     r15,STK_REG(R15)(r1)
596         std     r16,STK_REG(R16)(r1)
597
598         li      r12,64
599         li      r14,80
600         li      r15,96
601         li      r16,112
602
603         mtctr   r6
604
605         /*
606          * Now do cacheline sized loads and stores. By this stage the
607          * cacheline stores are also cacheline aligned.
608          */
609         .align  5
610 8:
611 err4;   lvx     vr7,r0,r4
612         vperm   vr8,vr0,vr7,vr16
613 err4;   lvx     vr6,r4,r9
614         vperm   vr9,vr7,vr6,vr16
615 err4;   lvx     vr5,r4,r10
616         vperm   vr10,vr6,vr5,vr16
617 err4;   lvx     vr4,r4,r11
618         vperm   vr11,vr5,vr4,vr16
619 err4;   lvx     vr3,r4,r12
620         vperm   vr12,vr4,vr3,vr16
621 err4;   lvx     vr2,r4,r14
622         vperm   vr13,vr3,vr2,vr16
623 err4;   lvx     vr1,r4,r15
624         vperm   vr14,vr2,vr1,vr16
625 err4;   lvx     vr0,r4,r16
626         vperm   vr15,vr1,vr0,vr16
627         addi    r4,r4,128
628 err4;   stvx    vr8,r0,r3
629 err4;   stvx    vr9,r3,r9
630 err4;   stvx    vr10,r3,r10
631 err4;   stvx    vr11,r3,r11
632 err4;   stvx    vr12,r3,r12
633 err4;   stvx    vr13,r3,r14
634 err4;   stvx    vr14,r3,r15
635 err4;   stvx    vr15,r3,r16
636         addi    r3,r3,128
637         bdnz    8b
638
639         ld      r14,STK_REG(R14)(r1)
640         ld      r15,STK_REG(R15)(r1)
641         ld      r16,STK_REG(R16)(r1)
642
643         /* Up to 127B to go */
644         clrldi  r5,r5,(64-7)
645         srdi    r6,r5,4
646         mtocrf  0x01,r6
647
648         bf      cr7*4+1,9f
649 err3;   lvx     vr3,r0,r4
650         vperm   vr8,vr0,vr3,vr16
651 err3;   lvx     vr2,r4,r9
652         vperm   vr9,vr3,vr2,vr16
653 err3;   lvx     vr1,r4,r10
654         vperm   vr10,vr2,vr1,vr16
655 err3;   lvx     vr0,r4,r11
656         vperm   vr11,vr1,vr0,vr16
657         addi    r4,r4,64
658 err3;   stvx    vr8,r0,r3
659 err3;   stvx    vr9,r3,r9
660 err3;   stvx    vr10,r3,r10
661 err3;   stvx    vr11,r3,r11
662         addi    r3,r3,64
663
664 9:      bf      cr7*4+2,10f
665 err3;   lvx     vr1,r0,r4
666         vperm   vr8,vr0,vr1,vr16
667 err3;   lvx     vr0,r4,r9
668         vperm   vr9,vr1,vr0,vr16
669         addi    r4,r4,32
670 err3;   stvx    vr8,r0,r3
671 err3;   stvx    vr9,r3,r9
672         addi    r3,r3,32
673
674 10:     bf      cr7*4+3,11f
675 err3;   lvx     vr1,r0,r4
676         vperm   vr8,vr0,vr1,vr16
677         addi    r4,r4,16
678 err3;   stvx    vr8,r0,r3
679         addi    r3,r3,16
680
681         /* Up to 15B to go */
682 11:     clrldi  r5,r5,(64-4)
683         addi    r4,r4,-16       /* Unwind the +16 load offset */
684         mtocrf  0x01,r5
685         bf      cr7*4+0,12f
686 err3;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
687 err3;   lwz     r6,4(r4)
688         addi    r4,r4,8
689 err3;   stw     r0,0(r3)
690 err3;   stw     r6,4(r3)
691         addi    r3,r3,8
692
693 12:     bf      cr7*4+1,13f
694 err3;   lwz     r0,0(r4)
695         addi    r4,r4,4
696 err3;   stw     r0,0(r3)
697         addi    r3,r3,4
698
699 13:     bf      cr7*4+2,14f
700 err3;   lhz     r0,0(r4)
701         addi    r4,r4,2
702 err3;   sth     r0,0(r3)
703         addi    r3,r3,2
704
705 14:     bf      cr7*4+3,15f
706 err3;   lbz     r0,0(r4)
707 err3;   stb     r0,0(r3)
708
709 15:     addi    r1,r1,STACKFRAMESIZE
710         b       .exit_vmx_usercopy      /* tail call optimise */
711 #endif /* CONFiG_ALTIVEC */