Merge branch 'kvm-arm/vgic-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / r600_blit.c
1 /*
2  * Copyright 2009 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *     Alex Deucher <alexander.deucher@amd.com>
25  *
26  * ------------------------ This file is DEPRECATED! -------------------------
27  */
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_drv.h"
31
32 #include "r600_blit_shaders.h"
33
34 #define DI_PT_RECTLIST        0x11
35 #define DI_INDEX_SIZE_16_BIT  0x0
36 #define DI_SRC_SEL_AUTO_INDEX 0x2
37
38 #define FMT_8                 0x1
39 #define FMT_5_6_5             0x8
40 #define FMT_8_8_8_8           0x1a
41 #define COLOR_8               0x1
42 #define COLOR_5_6_5           0x8
43 #define COLOR_8_8_8_8         0x1a
44
45 static void
46 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
47 {
48         u32 cb_color_info;
49         int pitch, slice;
50         RING_LOCALS;
51         DRM_DEBUG("\n");
52
53         h = ALIGN(h, 8);
54         if (h < 8)
55                 h = 8;
56
57         cb_color_info = ((format << 2) | (1 << 27));
58         pitch = (w / 8) - 1;
59         slice = ((w * h) / 64) - 1;
60
61         if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
62             ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
63                 BEGIN_RING(21 + 2);
64                 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
65                 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
66                 OUT_RING(gpu_addr >> 8);
67                 OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
68                 OUT_RING(2 << 0);
69         } else {
70                 BEGIN_RING(21);
71                 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
72                 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
73                 OUT_RING(gpu_addr >> 8);
74         }
75
76         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
77         OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
78         OUT_RING((pitch << 0) | (slice << 10));
79
80         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
81         OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
82         OUT_RING(0);
83
84         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
85         OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
86         OUT_RING(cb_color_info);
87
88         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
89         OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
90         OUT_RING(0);
91
92         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
93         OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
94         OUT_RING(0);
95
96         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
97         OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
98         OUT_RING(0);
99
100         ADVANCE_RING();
101 }
102
103 static void
104 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
105                     u32 sync_type, u32 size, u64 mc_addr)
106 {
107         u32 cp_coher_size;
108         RING_LOCALS;
109         DRM_DEBUG("\n");
110
111         if (size == 0xffffffff)
112                 cp_coher_size = 0xffffffff;
113         else
114                 cp_coher_size = ((size + 255) >> 8);
115
116         BEGIN_RING(5);
117         OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
118         OUT_RING(sync_type);
119         OUT_RING(cp_coher_size);
120         OUT_RING((mc_addr >> 8));
121         OUT_RING(10); /* poll interval */
122         ADVANCE_RING();
123 }
124
125 static void
126 set_shaders(struct drm_device *dev)
127 {
128         drm_radeon_private_t *dev_priv = dev->dev_private;
129         u64 gpu_addr;
130         int i;
131         u32 *vs, *ps;
132         uint32_t sq_pgm_resources;
133         RING_LOCALS;
134         DRM_DEBUG("\n");
135
136         /* load shaders */
137         vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
138         ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
139
140         for (i = 0; i < r6xx_vs_size; i++)
141                 vs[i] = cpu_to_le32(r6xx_vs[i]);
142         for (i = 0; i < r6xx_ps_size; i++)
143                 ps[i] = cpu_to_le32(r6xx_ps[i]);
144
145         dev_priv->blit_vb->used = 512;
146
147         gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
148
149         /* setup shader regs */
150         sq_pgm_resources = (1 << 0);
151
152         BEGIN_RING(9 + 12);
153         /* VS */
154         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
155         OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
156         OUT_RING(gpu_addr >> 8);
157
158         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
159         OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
160         OUT_RING(sq_pgm_resources);
161
162         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
163         OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
164         OUT_RING(0);
165
166         /* PS */
167         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
168         OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
169         OUT_RING((gpu_addr + 256) >> 8);
170
171         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
172         OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
173         OUT_RING(sq_pgm_resources | (1 << 28));
174
175         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
176         OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
177         OUT_RING(2);
178
179         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
180         OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
181         OUT_RING(0);
182         ADVANCE_RING();
183
184         cp_set_surface_sync(dev_priv,
185                             R600_SH_ACTION_ENA, 512, gpu_addr);
186 }
187
188 static void
189 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
190 {
191         uint32_t sq_vtx_constant_word2;
192         RING_LOCALS;
193         DRM_DEBUG("\n");
194
195         sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
196 #ifdef __BIG_ENDIAN
197         sq_vtx_constant_word2 |= (2 << 30);
198 #endif
199
200         BEGIN_RING(9);
201         OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
202         OUT_RING(0x460);
203         OUT_RING(gpu_addr & 0xffffffff);
204         OUT_RING(48 - 1);
205         OUT_RING(sq_vtx_constant_word2);
206         OUT_RING(1 << 0);
207         OUT_RING(0);
208         OUT_RING(0);
209         OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
210         ADVANCE_RING();
211
212         if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
213             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
214             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
215             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
216             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
217                 cp_set_surface_sync(dev_priv,
218                                     R600_TC_ACTION_ENA, 48, gpu_addr);
219         else
220                 cp_set_surface_sync(dev_priv,
221                                     R600_VC_ACTION_ENA, 48, gpu_addr);
222 }
223
224 static void
225 set_tex_resource(drm_radeon_private_t *dev_priv,
226                  int format, int w, int h, int pitch, u64 gpu_addr)
227 {
228         uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
229         RING_LOCALS;
230         DRM_DEBUG("\n");
231
232         if (h < 1)
233                 h = 1;
234
235         sq_tex_resource_word0 = (1 << 0);
236         sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
237                                   ((w - 1) << 19));
238
239         sq_tex_resource_word1 = (format << 26);
240         sq_tex_resource_word1 |= ((h - 1) << 0);
241
242         sq_tex_resource_word4 = ((1 << 14) |
243                                  (0 << 16) |
244                                  (1 << 19) |
245                                  (2 << 22) |
246                                  (3 << 25));
247
248         BEGIN_RING(9);
249         OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
250         OUT_RING(0);
251         OUT_RING(sq_tex_resource_word0);
252         OUT_RING(sq_tex_resource_word1);
253         OUT_RING(gpu_addr >> 8);
254         OUT_RING(gpu_addr >> 8);
255         OUT_RING(sq_tex_resource_word4);
256         OUT_RING(0);
257         OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
258         ADVANCE_RING();
259
260 }
261
262 static void
263 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
264 {
265         RING_LOCALS;
266         DRM_DEBUG("\n");
267
268         BEGIN_RING(12);
269         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
270         OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
271         OUT_RING((x1 << 0) | (y1 << 16));
272         OUT_RING((x2 << 0) | (y2 << 16));
273
274         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
275         OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
276         OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
277         OUT_RING((x2 << 0) | (y2 << 16));
278
279         OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
280         OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
281         OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
282         OUT_RING((x2 << 0) | (y2 << 16));
283         ADVANCE_RING();
284 }
285
286 static void
287 draw_auto(drm_radeon_private_t *dev_priv)
288 {
289         RING_LOCALS;
290         DRM_DEBUG("\n");
291
292         BEGIN_RING(10);
293         OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
294         OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
295         OUT_RING(DI_PT_RECTLIST);
296
297         OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
298 #ifdef __BIG_ENDIAN
299         OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
300 #else
301         OUT_RING(DI_INDEX_SIZE_16_BIT);
302 #endif
303
304         OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
305         OUT_RING(1);
306
307         OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
308         OUT_RING(3);
309         OUT_RING(DI_SRC_SEL_AUTO_INDEX);
310
311         ADVANCE_RING();
312         COMMIT_RING();
313 }
314
315 static void
316 set_default_state(drm_radeon_private_t *dev_priv)
317 {
318         int i;
319         u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
320         u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
321         int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
322         int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
323         int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
324         RING_LOCALS;
325
326         switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
327         case CHIP_R600:
328                 num_ps_gprs = 192;
329                 num_vs_gprs = 56;
330                 num_temp_gprs = 4;
331                 num_gs_gprs = 0;
332                 num_es_gprs = 0;
333                 num_ps_threads = 136;
334                 num_vs_threads = 48;
335                 num_gs_threads = 4;
336                 num_es_threads = 4;
337                 num_ps_stack_entries = 128;
338                 num_vs_stack_entries = 128;
339                 num_gs_stack_entries = 0;
340                 num_es_stack_entries = 0;
341                 break;
342         case CHIP_RV630:
343         case CHIP_RV635:
344                 num_ps_gprs = 84;
345                 num_vs_gprs = 36;
346                 num_temp_gprs = 4;
347                 num_gs_gprs = 0;
348                 num_es_gprs = 0;
349                 num_ps_threads = 144;
350                 num_vs_threads = 40;
351                 num_gs_threads = 4;
352                 num_es_threads = 4;
353                 num_ps_stack_entries = 40;
354                 num_vs_stack_entries = 40;
355                 num_gs_stack_entries = 32;
356                 num_es_stack_entries = 16;
357                 break;
358         case CHIP_RV610:
359         case CHIP_RV620:
360         case CHIP_RS780:
361         case CHIP_RS880:
362         default:
363                 num_ps_gprs = 84;
364                 num_vs_gprs = 36;
365                 num_temp_gprs = 4;
366                 num_gs_gprs = 0;
367                 num_es_gprs = 0;
368                 num_ps_threads = 136;
369                 num_vs_threads = 48;
370                 num_gs_threads = 4;
371                 num_es_threads = 4;
372                 num_ps_stack_entries = 40;
373                 num_vs_stack_entries = 40;
374                 num_gs_stack_entries = 32;
375                 num_es_stack_entries = 16;
376                 break;
377         case CHIP_RV670:
378                 num_ps_gprs = 144;
379                 num_vs_gprs = 40;
380                 num_temp_gprs = 4;
381                 num_gs_gprs = 0;
382                 num_es_gprs = 0;
383                 num_ps_threads = 136;
384                 num_vs_threads = 48;
385                 num_gs_threads = 4;
386                 num_es_threads = 4;
387                 num_ps_stack_entries = 40;
388                 num_vs_stack_entries = 40;
389                 num_gs_stack_entries = 32;
390                 num_es_stack_entries = 16;
391                 break;
392         case CHIP_RV770:
393                 num_ps_gprs = 192;
394                 num_vs_gprs = 56;
395                 num_temp_gprs = 4;
396                 num_gs_gprs = 0;
397                 num_es_gprs = 0;
398                 num_ps_threads = 188;
399                 num_vs_threads = 60;
400                 num_gs_threads = 0;
401                 num_es_threads = 0;
402                 num_ps_stack_entries = 256;
403                 num_vs_stack_entries = 256;
404                 num_gs_stack_entries = 0;
405                 num_es_stack_entries = 0;
406                 break;
407         case CHIP_RV730:
408         case CHIP_RV740:
409                 num_ps_gprs = 84;
410                 num_vs_gprs = 36;
411                 num_temp_gprs = 4;
412                 num_gs_gprs = 0;
413                 num_es_gprs = 0;
414                 num_ps_threads = 188;
415                 num_vs_threads = 60;
416                 num_gs_threads = 0;
417                 num_es_threads = 0;
418                 num_ps_stack_entries = 128;
419                 num_vs_stack_entries = 128;
420                 num_gs_stack_entries = 0;
421                 num_es_stack_entries = 0;
422                 break;
423         case CHIP_RV710:
424                 num_ps_gprs = 192;
425                 num_vs_gprs = 56;
426                 num_temp_gprs = 4;
427                 num_gs_gprs = 0;
428                 num_es_gprs = 0;
429                 num_ps_threads = 144;
430                 num_vs_threads = 48;
431                 num_gs_threads = 0;
432                 num_es_threads = 0;
433                 num_ps_stack_entries = 128;
434                 num_vs_stack_entries = 128;
435                 num_gs_stack_entries = 0;
436                 num_es_stack_entries = 0;
437                 break;
438         }
439
440         if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
441             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
442             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
443             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
444             ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
445                 sq_config = 0;
446         else
447                 sq_config = R600_VC_ENABLE;
448
449         sq_config |= (R600_DX9_CONSTS |
450                       R600_ALU_INST_PREFER_VECTOR |
451                       R600_PS_PRIO(0) |
452                       R600_VS_PRIO(1) |
453                       R600_GS_PRIO(2) |
454                       R600_ES_PRIO(3));
455
456         sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
457                                   R600_NUM_VS_GPRS(num_vs_gprs) |
458                                   R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
459         sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
460                                   R600_NUM_ES_GPRS(num_es_gprs));
461         sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
462                                    R600_NUM_VS_THREADS(num_vs_threads) |
463                                    R600_NUM_GS_THREADS(num_gs_threads) |
464                                    R600_NUM_ES_THREADS(num_es_threads));
465         sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
466                                     R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
467         sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
468                                     R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
469
470         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
471                 BEGIN_RING(r7xx_default_size + 10);
472                 for (i = 0; i < r7xx_default_size; i++)
473                         OUT_RING(r7xx_default_state[i]);
474         } else {
475                 BEGIN_RING(r6xx_default_size + 10);
476                 for (i = 0; i < r6xx_default_size; i++)
477                         OUT_RING(r6xx_default_state[i]);
478         }
479         OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
480         OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
481         /* SQ config */
482         OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
483         OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
484         OUT_RING(sq_config);
485         OUT_RING(sq_gpr_resource_mgmt_1);
486         OUT_RING(sq_gpr_resource_mgmt_2);
487         OUT_RING(sq_thread_resource_mgmt);
488         OUT_RING(sq_stack_resource_mgmt_1);
489         OUT_RING(sq_stack_resource_mgmt_2);
490         ADVANCE_RING();
491 }
492
493 static int r600_nomm_get_vb(struct drm_device *dev)
494 {
495         drm_radeon_private_t *dev_priv = dev->dev_private;
496         dev_priv->blit_vb = radeon_freelist_get(dev);
497         if (!dev_priv->blit_vb) {
498                 DRM_ERROR("Unable to allocate vertex buffer for blit\n");
499                 return -EAGAIN;
500         }
501         return 0;
502 }
503
504 static void r600_nomm_put_vb(struct drm_device *dev)
505 {
506         drm_radeon_private_t *dev_priv = dev->dev_private;
507
508         dev_priv->blit_vb->used = 0;
509         radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
510 }
511
512 static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
513 {
514         drm_radeon_private_t *dev_priv = dev->dev_private;
515         return (((char *)dev->agp_buffer_map->handle +
516                  dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
517 }
518
519 int
520 r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
521 {
522         drm_radeon_private_t *dev_priv = dev->dev_private;
523         int ret;
524         DRM_DEBUG("\n");
525
526         ret = r600_nomm_get_vb(dev);
527         if (ret)
528                 return ret;
529
530         dev_priv->blit_vb->file_priv = file_priv;
531
532         set_default_state(dev_priv);
533         set_shaders(dev);
534
535         return 0;
536 }
537
538
539 void
540 r600_done_blit_copy(struct drm_device *dev)
541 {
542         drm_radeon_private_t *dev_priv = dev->dev_private;
543         RING_LOCALS;
544         DRM_DEBUG("\n");
545
546         BEGIN_RING(5);
547         OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
548         OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
549         /* wait for 3D idle clean */
550         OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
551         OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
552         OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
553
554         ADVANCE_RING();
555         COMMIT_RING();
556
557         r600_nomm_put_vb(dev);
558 }
559
560 void
561 r600_blit_copy(struct drm_device *dev,
562                uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
563                int size_bytes)
564 {
565         drm_radeon_private_t *dev_priv = dev->dev_private;
566         int max_bytes;
567         u64 vb_addr;
568         u32 *vb;
569
570         vb = r600_nomm_get_vb_ptr(dev);
571
572         if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
573                 max_bytes = 8192;
574
575                 while (size_bytes) {
576                         int cur_size = size_bytes;
577                         int src_x = src_gpu_addr & 255;
578                         int dst_x = dst_gpu_addr & 255;
579                         int h = 1;
580                         src_gpu_addr = src_gpu_addr & ~255;
581                         dst_gpu_addr = dst_gpu_addr & ~255;
582
583                         if (!src_x && !dst_x) {
584                                 h = (cur_size / max_bytes);
585                                 if (h > 8192)
586                                         h = 8192;
587                                 if (h == 0)
588                                         h = 1;
589                                 else
590                                         cur_size = max_bytes;
591                         } else {
592                                 if (cur_size > max_bytes)
593                                         cur_size = max_bytes;
594                                 if (cur_size > (max_bytes - dst_x))
595                                         cur_size = (max_bytes - dst_x);
596                                 if (cur_size > (max_bytes - src_x))
597                                         cur_size = (max_bytes - src_x);
598                         }
599
600                         if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
601
602                                 r600_nomm_put_vb(dev);
603                                 r600_nomm_get_vb(dev);
604                                 if (!dev_priv->blit_vb)
605                                         return;
606                                 set_shaders(dev);
607                                 vb = r600_nomm_get_vb_ptr(dev);
608                         }
609
610                         vb[0] = int2float(dst_x);
611                         vb[1] = 0;
612                         vb[2] = int2float(src_x);
613                         vb[3] = 0;
614
615                         vb[4] = int2float(dst_x);
616                         vb[5] = int2float(h);
617                         vb[6] = int2float(src_x);
618                         vb[7] = int2float(h);
619
620                         vb[8] = int2float(dst_x + cur_size);
621                         vb[9] = int2float(h);
622                         vb[10] = int2float(src_x + cur_size);
623                         vb[11] = int2float(h);
624
625                         /* src */
626                         set_tex_resource(dev_priv, FMT_8,
627                                          src_x + cur_size, h, src_x + cur_size,
628                                          src_gpu_addr);
629
630                         cp_set_surface_sync(dev_priv,
631                                             R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
632
633                         /* dst */
634                         set_render_target(dev_priv, COLOR_8,
635                                           dst_x + cur_size, h,
636                                           dst_gpu_addr);
637
638                         /* scissors */
639                         set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
640
641                         /* Vertex buffer setup */
642                         vb_addr = dev_priv->gart_buffers_offset +
643                                 dev_priv->blit_vb->offset +
644                                 dev_priv->blit_vb->used;
645                         set_vtx_resource(dev_priv, vb_addr);
646
647                         /* draw */
648                         draw_auto(dev_priv);
649
650                         cp_set_surface_sync(dev_priv,
651                                             R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
652                                             cur_size * h, dst_gpu_addr);
653
654                         vb += 12;
655                         dev_priv->blit_vb->used += 12 * 4;
656
657                         src_gpu_addr += cur_size * h;
658                         dst_gpu_addr += cur_size * h;
659                         size_bytes -= cur_size * h;
660                 }
661         } else {
662                 max_bytes = 8192 * 4;
663
664                 while (size_bytes) {
665                         int cur_size = size_bytes;
666                         int src_x = (src_gpu_addr & 255);
667                         int dst_x = (dst_gpu_addr & 255);
668                         int h = 1;
669                         src_gpu_addr = src_gpu_addr & ~255;
670                         dst_gpu_addr = dst_gpu_addr & ~255;
671
672                         if (!src_x && !dst_x) {
673                                 h = (cur_size / max_bytes);
674                                 if (h > 8192)
675                                         h = 8192;
676                                 if (h == 0)
677                                         h = 1;
678                                 else
679                                         cur_size = max_bytes;
680                         } else {
681                                 if (cur_size > max_bytes)
682                                         cur_size = max_bytes;
683                                 if (cur_size > (max_bytes - dst_x))
684                                         cur_size = (max_bytes - dst_x);
685                                 if (cur_size > (max_bytes - src_x))
686                                         cur_size = (max_bytes - src_x);
687                         }
688
689                         if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
690                                 r600_nomm_put_vb(dev);
691                                 r600_nomm_get_vb(dev);
692                                 if (!dev_priv->blit_vb)
693                                         return;
694
695                                 set_shaders(dev);
696                                 vb = r600_nomm_get_vb_ptr(dev);
697                         }
698
699                         vb[0] = int2float(dst_x / 4);
700                         vb[1] = 0;
701                         vb[2] = int2float(src_x / 4);
702                         vb[3] = 0;
703
704                         vb[4] = int2float(dst_x / 4);
705                         vb[5] = int2float(h);
706                         vb[6] = int2float(src_x / 4);
707                         vb[7] = int2float(h);
708
709                         vb[8] = int2float((dst_x + cur_size) / 4);
710                         vb[9] = int2float(h);
711                         vb[10] = int2float((src_x + cur_size) / 4);
712                         vb[11] = int2float(h);
713
714                         /* src */
715                         set_tex_resource(dev_priv, FMT_8_8_8_8,
716                                          (src_x + cur_size) / 4,
717                                          h, (src_x + cur_size) / 4,
718                                          src_gpu_addr);
719
720                         cp_set_surface_sync(dev_priv,
721                                             R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
722
723                         /* dst */
724                         set_render_target(dev_priv, COLOR_8_8_8_8,
725                                           (dst_x + cur_size) / 4, h,
726                                           dst_gpu_addr);
727
728                         /* scissors */
729                         set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
730
731                         /* Vertex buffer setup */
732                         vb_addr = dev_priv->gart_buffers_offset +
733                                 dev_priv->blit_vb->offset +
734                                 dev_priv->blit_vb->used;
735                         set_vtx_resource(dev_priv, vb_addr);
736
737                         /* draw */
738                         draw_auto(dev_priv);
739
740                         cp_set_surface_sync(dev_priv,
741                                             R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
742                                             cur_size * h, dst_gpu_addr);
743
744                         vb += 12;
745                         dev_priv->blit_vb->used += 12 * 4;
746
747                         src_gpu_addr += cur_size * h;
748                         dst_gpu_addr += cur_size * h;
749                         size_bytes -= cur_size * h;
750                 }
751         }
752 }
753
754 void
755 r600_blit_swap(struct drm_device *dev,
756                uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
757                int sx, int sy, int dx, int dy,
758                int w, int h, int src_pitch, int dst_pitch, int cpp)
759 {
760         drm_radeon_private_t *dev_priv = dev->dev_private;
761         int cb_format, tex_format;
762         int sx2, sy2, dx2, dy2;
763         u64 vb_addr;
764         u32 *vb;
765
766         if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
767
768                 r600_nomm_put_vb(dev);
769                 r600_nomm_get_vb(dev);
770                 if (!dev_priv->blit_vb)
771                         return;
772
773                 set_shaders(dev);
774         }
775         vb = r600_nomm_get_vb_ptr(dev);
776
777         sx2 = sx + w;
778         sy2 = sy + h;
779         dx2 = dx + w;
780         dy2 = dy + h;
781
782         vb[0] = int2float(dx);
783         vb[1] = int2float(dy);
784         vb[2] = int2float(sx);
785         vb[3] = int2float(sy);
786
787         vb[4] = int2float(dx);
788         vb[5] = int2float(dy2);
789         vb[6] = int2float(sx);
790         vb[7] = int2float(sy2);
791
792         vb[8] = int2float(dx2);
793         vb[9] = int2float(dy2);
794         vb[10] = int2float(sx2);
795         vb[11] = int2float(sy2);
796
797         switch(cpp) {
798         case 4:
799                 cb_format = COLOR_8_8_8_8;
800                 tex_format = FMT_8_8_8_8;
801                 break;
802         case 2:
803                 cb_format = COLOR_5_6_5;
804                 tex_format = FMT_5_6_5;
805                 break;
806         default:
807                 cb_format = COLOR_8;
808                 tex_format = FMT_8;
809                 break;
810         }
811
812         /* src */
813         set_tex_resource(dev_priv, tex_format,
814                          src_pitch / cpp,
815                          sy2, src_pitch / cpp,
816                          src_gpu_addr);
817
818         cp_set_surface_sync(dev_priv,
819                             R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
820
821         /* dst */
822         set_render_target(dev_priv, cb_format,
823                           dst_pitch / cpp, dy2,
824                           dst_gpu_addr);
825
826         /* scissors */
827         set_scissors(dev_priv, dx, dy, dx2, dy2);
828
829         /* Vertex buffer setup */
830         vb_addr = dev_priv->gart_buffers_offset +
831                 dev_priv->blit_vb->offset +
832                 dev_priv->blit_vb->used;
833         set_vtx_resource(dev_priv, vb_addr);
834
835         /* draw */
836         draw_auto(dev_priv);
837
838         cp_set_surface_sync(dev_priv,
839                             R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
840                             dst_pitch * dy2, dst_gpu_addr);
841
842         dev_priv->blit_vb->used += 12 * 4;
843 }