Merge remote-tracking branch 'lsk/v3.10/topic/gator' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / ni.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "nid.h"
33 #include "atom.h"
34 #include "ni_reg.h"
35 #include "cayman_blit_shaders.h"
36
37 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
38 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
39 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
40 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
41 extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
42 extern void evergreen_mc_program(struct radeon_device *rdev);
43 extern void evergreen_irq_suspend(struct radeon_device *rdev);
44 extern int evergreen_mc_init(struct radeon_device *rdev);
45 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
46 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
47 extern void si_rlc_fini(struct radeon_device *rdev);
48 extern int si_rlc_init(struct radeon_device *rdev);
49
50 #define EVERGREEN_PFP_UCODE_SIZE 1120
51 #define EVERGREEN_PM4_UCODE_SIZE 1376
52 #define EVERGREEN_RLC_UCODE_SIZE 768
53 #define BTC_MC_UCODE_SIZE 6024
54
55 #define CAYMAN_PFP_UCODE_SIZE 2176
56 #define CAYMAN_PM4_UCODE_SIZE 2176
57 #define CAYMAN_RLC_UCODE_SIZE 1024
58 #define CAYMAN_MC_UCODE_SIZE 6037
59
60 #define ARUBA_RLC_UCODE_SIZE 1536
61
62 /* Firmware Names */
63 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
64 MODULE_FIRMWARE("radeon/BARTS_me.bin");
65 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
66 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
67 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
68 MODULE_FIRMWARE("radeon/TURKS_me.bin");
69 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
70 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
71 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
72 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
73 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
74 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
75 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
76 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
77 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
78 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
79 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
80
81
82 static const u32 cayman_golden_registers2[] =
83 {
84         0x3e5c, 0xffffffff, 0x00000000,
85         0x3e48, 0xffffffff, 0x00000000,
86         0x3e4c, 0xffffffff, 0x00000000,
87         0x3e64, 0xffffffff, 0x00000000,
88         0x3e50, 0xffffffff, 0x00000000,
89         0x3e60, 0xffffffff, 0x00000000
90 };
91
92 static const u32 cayman_golden_registers[] =
93 {
94         0x5eb4, 0xffffffff, 0x00000002,
95         0x5e78, 0x8f311ff1, 0x001000f0,
96         0x3f90, 0xffff0000, 0xff000000,
97         0x9148, 0xffff0000, 0xff000000,
98         0x3f94, 0xffff0000, 0xff000000,
99         0x914c, 0xffff0000, 0xff000000,
100         0xc78, 0x00000080, 0x00000080,
101         0xbd4, 0x70073777, 0x00011003,
102         0xd02c, 0xbfffff1f, 0x08421000,
103         0xd0b8, 0x73773777, 0x02011003,
104         0x5bc0, 0x00200000, 0x50100000,
105         0x98f8, 0x33773777, 0x02011003,
106         0x98fc, 0xffffffff, 0x76541032,
107         0x7030, 0x31000311, 0x00000011,
108         0x2f48, 0x33773777, 0x42010001,
109         0x6b28, 0x00000010, 0x00000012,
110         0x7728, 0x00000010, 0x00000012,
111         0x10328, 0x00000010, 0x00000012,
112         0x10f28, 0x00000010, 0x00000012,
113         0x11b28, 0x00000010, 0x00000012,
114         0x12728, 0x00000010, 0x00000012,
115         0x240c, 0x000007ff, 0x00000000,
116         0x8a14, 0xf000001f, 0x00000007,
117         0x8b24, 0x3fff3fff, 0x00ff0fff,
118         0x8b10, 0x0000ff0f, 0x00000000,
119         0x28a4c, 0x07ffffff, 0x06000000,
120         0x10c, 0x00000001, 0x00010003,
121         0xa02c, 0xffffffff, 0x0000009b,
122         0x913c, 0x0000010f, 0x01000100,
123         0x8c04, 0xf8ff00ff, 0x40600060,
124         0x28350, 0x00000f01, 0x00000000,
125         0x9508, 0x3700001f, 0x00000002,
126         0x960c, 0xffffffff, 0x54763210,
127         0x88c4, 0x001f3ae3, 0x00000082,
128         0x88d0, 0xffffffff, 0x0f40df40,
129         0x88d4, 0x0000001f, 0x00000010,
130         0x8974, 0xffffffff, 0x00000000
131 };
132
133 static const u32 dvst_golden_registers2[] =
134 {
135         0x8f8, 0xffffffff, 0,
136         0x8fc, 0x00380000, 0,
137         0x8f8, 0xffffffff, 1,
138         0x8fc, 0x0e000000, 0
139 };
140
141 static const u32 dvst_golden_registers[] =
142 {
143         0x690, 0x3fff3fff, 0x20c00033,
144         0x918c, 0x0fff0fff, 0x00010006,
145         0x91a8, 0x0fff0fff, 0x00010006,
146         0x9150, 0xffffdfff, 0x6e944040,
147         0x917c, 0x0fff0fff, 0x00030002,
148         0x9198, 0x0fff0fff, 0x00030002,
149         0x915c, 0x0fff0fff, 0x00010000,
150         0x3f90, 0xffff0001, 0xff000000,
151         0x9178, 0x0fff0fff, 0x00070000,
152         0x9194, 0x0fff0fff, 0x00070000,
153         0x9148, 0xffff0001, 0xff000000,
154         0x9190, 0x0fff0fff, 0x00090008,
155         0x91ac, 0x0fff0fff, 0x00090008,
156         0x3f94, 0xffff0000, 0xff000000,
157         0x914c, 0xffff0000, 0xff000000,
158         0x929c, 0x00000fff, 0x00000001,
159         0x55e4, 0xff607fff, 0xfc000100,
160         0x8a18, 0xff000fff, 0x00000100,
161         0x8b28, 0xff000fff, 0x00000100,
162         0x9144, 0xfffc0fff, 0x00000100,
163         0x6ed8, 0x00010101, 0x00010000,
164         0x9830, 0xffffffff, 0x00000000,
165         0x9834, 0xf00fffff, 0x00000400,
166         0x9838, 0xfffffffe, 0x00000000,
167         0xd0c0, 0xff000fff, 0x00000100,
168         0xd02c, 0xbfffff1f, 0x08421000,
169         0xd0b8, 0x73773777, 0x12010001,
170         0x5bb0, 0x000000f0, 0x00000070,
171         0x98f8, 0x73773777, 0x12010001,
172         0x98fc, 0xffffffff, 0x00000010,
173         0x9b7c, 0x00ff0000, 0x00fc0000,
174         0x8030, 0x00001f0f, 0x0000100a,
175         0x2f48, 0x73773777, 0x12010001,
176         0x2408, 0x00030000, 0x000c007f,
177         0x8a14, 0xf000003f, 0x00000007,
178         0x8b24, 0x3fff3fff, 0x00ff0fff,
179         0x8b10, 0x0000ff0f, 0x00000000,
180         0x28a4c, 0x07ffffff, 0x06000000,
181         0x4d8, 0x00000fff, 0x00000100,
182         0xa008, 0xffffffff, 0x00010000,
183         0x913c, 0xffff03ff, 0x01000100,
184         0x8c00, 0x000000ff, 0x00000003,
185         0x8c04, 0xf8ff00ff, 0x40600060,
186         0x8cf0, 0x1fff1fff, 0x08e00410,
187         0x28350, 0x00000f01, 0x00000000,
188         0x9508, 0xf700071f, 0x00000002,
189         0x960c, 0xffffffff, 0x54763210,
190         0x20ef8, 0x01ff01ff, 0x00000002,
191         0x20e98, 0xfffffbff, 0x00200000,
192         0x2015c, 0xffffffff, 0x00000f40,
193         0x88c4, 0x001f3ae3, 0x00000082,
194         0x8978, 0x3fffffff, 0x04050140,
195         0x88d4, 0x0000001f, 0x00000010,
196         0x8974, 0xffffffff, 0x00000000
197 };
198
199 static const u32 scrapper_golden_registers[] =
200 {
201         0x690, 0x3fff3fff, 0x20c00033,
202         0x918c, 0x0fff0fff, 0x00010006,
203         0x918c, 0x0fff0fff, 0x00010006,
204         0x91a8, 0x0fff0fff, 0x00010006,
205         0x91a8, 0x0fff0fff, 0x00010006,
206         0x9150, 0xffffdfff, 0x6e944040,
207         0x9150, 0xffffdfff, 0x6e944040,
208         0x917c, 0x0fff0fff, 0x00030002,
209         0x917c, 0x0fff0fff, 0x00030002,
210         0x9198, 0x0fff0fff, 0x00030002,
211         0x9198, 0x0fff0fff, 0x00030002,
212         0x915c, 0x0fff0fff, 0x00010000,
213         0x915c, 0x0fff0fff, 0x00010000,
214         0x3f90, 0xffff0001, 0xff000000,
215         0x3f90, 0xffff0001, 0xff000000,
216         0x9178, 0x0fff0fff, 0x00070000,
217         0x9178, 0x0fff0fff, 0x00070000,
218         0x9194, 0x0fff0fff, 0x00070000,
219         0x9194, 0x0fff0fff, 0x00070000,
220         0x9148, 0xffff0001, 0xff000000,
221         0x9148, 0xffff0001, 0xff000000,
222         0x9190, 0x0fff0fff, 0x00090008,
223         0x9190, 0x0fff0fff, 0x00090008,
224         0x91ac, 0x0fff0fff, 0x00090008,
225         0x91ac, 0x0fff0fff, 0x00090008,
226         0x3f94, 0xffff0000, 0xff000000,
227         0x3f94, 0xffff0000, 0xff000000,
228         0x914c, 0xffff0000, 0xff000000,
229         0x914c, 0xffff0000, 0xff000000,
230         0x929c, 0x00000fff, 0x00000001,
231         0x929c, 0x00000fff, 0x00000001,
232         0x55e4, 0xff607fff, 0xfc000100,
233         0x8a18, 0xff000fff, 0x00000100,
234         0x8a18, 0xff000fff, 0x00000100,
235         0x8b28, 0xff000fff, 0x00000100,
236         0x8b28, 0xff000fff, 0x00000100,
237         0x9144, 0xfffc0fff, 0x00000100,
238         0x9144, 0xfffc0fff, 0x00000100,
239         0x6ed8, 0x00010101, 0x00010000,
240         0x9830, 0xffffffff, 0x00000000,
241         0x9830, 0xffffffff, 0x00000000,
242         0x9834, 0xf00fffff, 0x00000400,
243         0x9834, 0xf00fffff, 0x00000400,
244         0x9838, 0xfffffffe, 0x00000000,
245         0x9838, 0xfffffffe, 0x00000000,
246         0xd0c0, 0xff000fff, 0x00000100,
247         0xd02c, 0xbfffff1f, 0x08421000,
248         0xd02c, 0xbfffff1f, 0x08421000,
249         0xd0b8, 0x73773777, 0x12010001,
250         0xd0b8, 0x73773777, 0x12010001,
251         0x5bb0, 0x000000f0, 0x00000070,
252         0x98f8, 0x73773777, 0x12010001,
253         0x98f8, 0x73773777, 0x12010001,
254         0x98fc, 0xffffffff, 0x00000010,
255         0x98fc, 0xffffffff, 0x00000010,
256         0x9b7c, 0x00ff0000, 0x00fc0000,
257         0x9b7c, 0x00ff0000, 0x00fc0000,
258         0x8030, 0x00001f0f, 0x0000100a,
259         0x8030, 0x00001f0f, 0x0000100a,
260         0x2f48, 0x73773777, 0x12010001,
261         0x2f48, 0x73773777, 0x12010001,
262         0x2408, 0x00030000, 0x000c007f,
263         0x8a14, 0xf000003f, 0x00000007,
264         0x8a14, 0xf000003f, 0x00000007,
265         0x8b24, 0x3fff3fff, 0x00ff0fff,
266         0x8b24, 0x3fff3fff, 0x00ff0fff,
267         0x8b10, 0x0000ff0f, 0x00000000,
268         0x8b10, 0x0000ff0f, 0x00000000,
269         0x28a4c, 0x07ffffff, 0x06000000,
270         0x28a4c, 0x07ffffff, 0x06000000,
271         0x4d8, 0x00000fff, 0x00000100,
272         0x4d8, 0x00000fff, 0x00000100,
273         0xa008, 0xffffffff, 0x00010000,
274         0xa008, 0xffffffff, 0x00010000,
275         0x913c, 0xffff03ff, 0x01000100,
276         0x913c, 0xffff03ff, 0x01000100,
277         0x90e8, 0x001fffff, 0x010400c0,
278         0x8c00, 0x000000ff, 0x00000003,
279         0x8c00, 0x000000ff, 0x00000003,
280         0x8c04, 0xf8ff00ff, 0x40600060,
281         0x8c04, 0xf8ff00ff, 0x40600060,
282         0x8c30, 0x0000000f, 0x00040005,
283         0x8cf0, 0x1fff1fff, 0x08e00410,
284         0x8cf0, 0x1fff1fff, 0x08e00410,
285         0x900c, 0x00ffffff, 0x0017071f,
286         0x28350, 0x00000f01, 0x00000000,
287         0x28350, 0x00000f01, 0x00000000,
288         0x9508, 0xf700071f, 0x00000002,
289         0x9508, 0xf700071f, 0x00000002,
290         0x9688, 0x00300000, 0x0017000f,
291         0x960c, 0xffffffff, 0x54763210,
292         0x960c, 0xffffffff, 0x54763210,
293         0x20ef8, 0x01ff01ff, 0x00000002,
294         0x20e98, 0xfffffbff, 0x00200000,
295         0x2015c, 0xffffffff, 0x00000f40,
296         0x88c4, 0x001f3ae3, 0x00000082,
297         0x88c4, 0x001f3ae3, 0x00000082,
298         0x8978, 0x3fffffff, 0x04050140,
299         0x8978, 0x3fffffff, 0x04050140,
300         0x88d4, 0x0000001f, 0x00000010,
301         0x88d4, 0x0000001f, 0x00000010,
302         0x8974, 0xffffffff, 0x00000000,
303         0x8974, 0xffffffff, 0x00000000
304 };
305
306 static void ni_init_golden_registers(struct radeon_device *rdev)
307 {
308         switch (rdev->family) {
309         case CHIP_CAYMAN:
310                 radeon_program_register_sequence(rdev,
311                                                  cayman_golden_registers,
312                                                  (const u32)ARRAY_SIZE(cayman_golden_registers));
313                 radeon_program_register_sequence(rdev,
314                                                  cayman_golden_registers2,
315                                                  (const u32)ARRAY_SIZE(cayman_golden_registers2));
316                 break;
317         case CHIP_ARUBA:
318                 if ((rdev->pdev->device == 0x9900) ||
319                     (rdev->pdev->device == 0x9901) ||
320                     (rdev->pdev->device == 0x9903) ||
321                     (rdev->pdev->device == 0x9904) ||
322                     (rdev->pdev->device == 0x9905) ||
323                     (rdev->pdev->device == 0x9906) ||
324                     (rdev->pdev->device == 0x9907) ||
325                     (rdev->pdev->device == 0x9908) ||
326                     (rdev->pdev->device == 0x9909) ||
327                     (rdev->pdev->device == 0x990A) ||
328                     (rdev->pdev->device == 0x990B) ||
329                     (rdev->pdev->device == 0x990C) ||
330                     (rdev->pdev->device == 0x990D) ||
331                     (rdev->pdev->device == 0x990E) ||
332                     (rdev->pdev->device == 0x990F) ||
333                     (rdev->pdev->device == 0x9910) ||
334                     (rdev->pdev->device == 0x9913) ||
335                     (rdev->pdev->device == 0x9917) ||
336                     (rdev->pdev->device == 0x9918)) {
337                         radeon_program_register_sequence(rdev,
338                                                          dvst_golden_registers,
339                                                          (const u32)ARRAY_SIZE(dvst_golden_registers));
340                         radeon_program_register_sequence(rdev,
341                                                          dvst_golden_registers2,
342                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
343                 } else {
344                         radeon_program_register_sequence(rdev,
345                                                          scrapper_golden_registers,
346                                                          (const u32)ARRAY_SIZE(scrapper_golden_registers));
347                         radeon_program_register_sequence(rdev,
348                                                          dvst_golden_registers2,
349                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
350                 }
351                 break;
352         default:
353                 break;
354         }
355 }
356
357 #define BTC_IO_MC_REGS_SIZE 29
358
359 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
360         {0x00000077, 0xff010100},
361         {0x00000078, 0x00000000},
362         {0x00000079, 0x00001434},
363         {0x0000007a, 0xcc08ec08},
364         {0x0000007b, 0x00040000},
365         {0x0000007c, 0x000080c0},
366         {0x0000007d, 0x09000000},
367         {0x0000007e, 0x00210404},
368         {0x00000081, 0x08a8e800},
369         {0x00000082, 0x00030444},
370         {0x00000083, 0x00000000},
371         {0x00000085, 0x00000001},
372         {0x00000086, 0x00000002},
373         {0x00000087, 0x48490000},
374         {0x00000088, 0x20244647},
375         {0x00000089, 0x00000005},
376         {0x0000008b, 0x66030000},
377         {0x0000008c, 0x00006603},
378         {0x0000008d, 0x00000100},
379         {0x0000008f, 0x00001c0a},
380         {0x00000090, 0xff000001},
381         {0x00000094, 0x00101101},
382         {0x00000095, 0x00000fff},
383         {0x00000096, 0x00116fff},
384         {0x00000097, 0x60010000},
385         {0x00000098, 0x10010000},
386         {0x00000099, 0x00006000},
387         {0x0000009a, 0x00001000},
388         {0x0000009f, 0x00946a00}
389 };
390
391 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
392         {0x00000077, 0xff010100},
393         {0x00000078, 0x00000000},
394         {0x00000079, 0x00001434},
395         {0x0000007a, 0xcc08ec08},
396         {0x0000007b, 0x00040000},
397         {0x0000007c, 0x000080c0},
398         {0x0000007d, 0x09000000},
399         {0x0000007e, 0x00210404},
400         {0x00000081, 0x08a8e800},
401         {0x00000082, 0x00030444},
402         {0x00000083, 0x00000000},
403         {0x00000085, 0x00000001},
404         {0x00000086, 0x00000002},
405         {0x00000087, 0x48490000},
406         {0x00000088, 0x20244647},
407         {0x00000089, 0x00000005},
408         {0x0000008b, 0x66030000},
409         {0x0000008c, 0x00006603},
410         {0x0000008d, 0x00000100},
411         {0x0000008f, 0x00001c0a},
412         {0x00000090, 0xff000001},
413         {0x00000094, 0x00101101},
414         {0x00000095, 0x00000fff},
415         {0x00000096, 0x00116fff},
416         {0x00000097, 0x60010000},
417         {0x00000098, 0x10010000},
418         {0x00000099, 0x00006000},
419         {0x0000009a, 0x00001000},
420         {0x0000009f, 0x00936a00}
421 };
422
423 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
424         {0x00000077, 0xff010100},
425         {0x00000078, 0x00000000},
426         {0x00000079, 0x00001434},
427         {0x0000007a, 0xcc08ec08},
428         {0x0000007b, 0x00040000},
429         {0x0000007c, 0x000080c0},
430         {0x0000007d, 0x09000000},
431         {0x0000007e, 0x00210404},
432         {0x00000081, 0x08a8e800},
433         {0x00000082, 0x00030444},
434         {0x00000083, 0x00000000},
435         {0x00000085, 0x00000001},
436         {0x00000086, 0x00000002},
437         {0x00000087, 0x48490000},
438         {0x00000088, 0x20244647},
439         {0x00000089, 0x00000005},
440         {0x0000008b, 0x66030000},
441         {0x0000008c, 0x00006603},
442         {0x0000008d, 0x00000100},
443         {0x0000008f, 0x00001c0a},
444         {0x00000090, 0xff000001},
445         {0x00000094, 0x00101101},
446         {0x00000095, 0x00000fff},
447         {0x00000096, 0x00116fff},
448         {0x00000097, 0x60010000},
449         {0x00000098, 0x10010000},
450         {0x00000099, 0x00006000},
451         {0x0000009a, 0x00001000},
452         {0x0000009f, 0x00916a00}
453 };
454
455 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
456         {0x00000077, 0xff010100},
457         {0x00000078, 0x00000000},
458         {0x00000079, 0x00001434},
459         {0x0000007a, 0xcc08ec08},
460         {0x0000007b, 0x00040000},
461         {0x0000007c, 0x000080c0},
462         {0x0000007d, 0x09000000},
463         {0x0000007e, 0x00210404},
464         {0x00000081, 0x08a8e800},
465         {0x00000082, 0x00030444},
466         {0x00000083, 0x00000000},
467         {0x00000085, 0x00000001},
468         {0x00000086, 0x00000002},
469         {0x00000087, 0x48490000},
470         {0x00000088, 0x20244647},
471         {0x00000089, 0x00000005},
472         {0x0000008b, 0x66030000},
473         {0x0000008c, 0x00006603},
474         {0x0000008d, 0x00000100},
475         {0x0000008f, 0x00001c0a},
476         {0x00000090, 0xff000001},
477         {0x00000094, 0x00101101},
478         {0x00000095, 0x00000fff},
479         {0x00000096, 0x00116fff},
480         {0x00000097, 0x60010000},
481         {0x00000098, 0x10010000},
482         {0x00000099, 0x00006000},
483         {0x0000009a, 0x00001000},
484         {0x0000009f, 0x00976b00}
485 };
486
487 int ni_mc_load_microcode(struct radeon_device *rdev)
488 {
489         const __be32 *fw_data;
490         u32 mem_type, running, blackout = 0;
491         u32 *io_mc_regs;
492         int i, ucode_size, regs_size;
493
494         if (!rdev->mc_fw)
495                 return -EINVAL;
496
497         switch (rdev->family) {
498         case CHIP_BARTS:
499                 io_mc_regs = (u32 *)&barts_io_mc_regs;
500                 ucode_size = BTC_MC_UCODE_SIZE;
501                 regs_size = BTC_IO_MC_REGS_SIZE;
502                 break;
503         case CHIP_TURKS:
504                 io_mc_regs = (u32 *)&turks_io_mc_regs;
505                 ucode_size = BTC_MC_UCODE_SIZE;
506                 regs_size = BTC_IO_MC_REGS_SIZE;
507                 break;
508         case CHIP_CAICOS:
509         default:
510                 io_mc_regs = (u32 *)&caicos_io_mc_regs;
511                 ucode_size = BTC_MC_UCODE_SIZE;
512                 regs_size = BTC_IO_MC_REGS_SIZE;
513                 break;
514         case CHIP_CAYMAN:
515                 io_mc_regs = (u32 *)&cayman_io_mc_regs;
516                 ucode_size = CAYMAN_MC_UCODE_SIZE;
517                 regs_size = BTC_IO_MC_REGS_SIZE;
518                 break;
519         }
520
521         mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
522         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
523
524         if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
525                 if (running) {
526                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
527                         WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
528                 }
529
530                 /* reset the engine and set to writable */
531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
532                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
533
534                 /* load mc io regs */
535                 for (i = 0; i < regs_size; i++) {
536                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
537                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
538                 }
539                 /* load the MC ucode */
540                 fw_data = (const __be32 *)rdev->mc_fw->data;
541                 for (i = 0; i < ucode_size; i++)
542                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
543
544                 /* put the engine back into the active state */
545                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
546                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
547                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
548
549                 /* wait for training to complete */
550                 for (i = 0; i < rdev->usec_timeout; i++) {
551                         if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
552                                 break;
553                         udelay(1);
554                 }
555
556                 if (running)
557                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
558         }
559
560         return 0;
561 }
562
563 int ni_init_microcode(struct radeon_device *rdev)
564 {
565         struct platform_device *pdev;
566         const char *chip_name;
567         const char *rlc_chip_name;
568         size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
569         char fw_name[30];
570         int err;
571
572         DRM_DEBUG("\n");
573
574         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
575         err = IS_ERR(pdev);
576         if (err) {
577                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
578                 return -EINVAL;
579         }
580
581         switch (rdev->family) {
582         case CHIP_BARTS:
583                 chip_name = "BARTS";
584                 rlc_chip_name = "BTC";
585                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
586                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
587                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
588                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
589                 break;
590         case CHIP_TURKS:
591                 chip_name = "TURKS";
592                 rlc_chip_name = "BTC";
593                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
594                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
595                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
596                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
597                 break;
598         case CHIP_CAICOS:
599                 chip_name = "CAICOS";
600                 rlc_chip_name = "BTC";
601                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
602                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
603                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
604                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
605                 break;
606         case CHIP_CAYMAN:
607                 chip_name = "CAYMAN";
608                 rlc_chip_name = "CAYMAN";
609                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
610                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
611                 rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
612                 mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
613                 break;
614         case CHIP_ARUBA:
615                 chip_name = "ARUBA";
616                 rlc_chip_name = "ARUBA";
617                 /* pfp/me same size as CAYMAN */
618                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
619                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
620                 rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
621                 mc_req_size = 0;
622                 break;
623         default: BUG();
624         }
625
626         DRM_INFO("Loading %s Microcode\n", chip_name);
627
628         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
629         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
630         if (err)
631                 goto out;
632         if (rdev->pfp_fw->size != pfp_req_size) {
633                 printk(KERN_ERR
634                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
635                        rdev->pfp_fw->size, fw_name);
636                 err = -EINVAL;
637                 goto out;
638         }
639
640         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
641         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
642         if (err)
643                 goto out;
644         if (rdev->me_fw->size != me_req_size) {
645                 printk(KERN_ERR
646                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
647                        rdev->me_fw->size, fw_name);
648                 err = -EINVAL;
649         }
650
651         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
652         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
653         if (err)
654                 goto out;
655         if (rdev->rlc_fw->size != rlc_req_size) {
656                 printk(KERN_ERR
657                        "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
658                        rdev->rlc_fw->size, fw_name);
659                 err = -EINVAL;
660         }
661
662         /* no MC ucode on TN */
663         if (!(rdev->flags & RADEON_IS_IGP)) {
664                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
665                 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
666                 if (err)
667                         goto out;
668                 if (rdev->mc_fw->size != mc_req_size) {
669                         printk(KERN_ERR
670                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
671                                rdev->mc_fw->size, fw_name);
672                         err = -EINVAL;
673                 }
674         }
675 out:
676         platform_device_unregister(pdev);
677
678         if (err) {
679                 if (err != -EINVAL)
680                         printk(KERN_ERR
681                                "ni_cp: Failed to load firmware \"%s\"\n",
682                                fw_name);
683                 release_firmware(rdev->pfp_fw);
684                 rdev->pfp_fw = NULL;
685                 release_firmware(rdev->me_fw);
686                 rdev->me_fw = NULL;
687                 release_firmware(rdev->rlc_fw);
688                 rdev->rlc_fw = NULL;
689                 release_firmware(rdev->mc_fw);
690                 rdev->mc_fw = NULL;
691         }
692         return err;
693 }
694
695 /*
696  * Core functions
697  */
698 static void cayman_gpu_init(struct radeon_device *rdev)
699 {
700         u32 gb_addr_config = 0;
701         u32 mc_shared_chmap, mc_arb_ramcfg;
702         u32 cgts_tcc_disable;
703         u32 sx_debug_1;
704         u32 smx_dc_ctl0;
705         u32 cgts_sm_ctrl_reg;
706         u32 hdp_host_path_cntl;
707         u32 tmp;
708         u32 disabled_rb_mask;
709         int i, j;
710
711         switch (rdev->family) {
712         case CHIP_CAYMAN:
713                 rdev->config.cayman.max_shader_engines = 2;
714                 rdev->config.cayman.max_pipes_per_simd = 4;
715                 rdev->config.cayman.max_tile_pipes = 8;
716                 rdev->config.cayman.max_simds_per_se = 12;
717                 rdev->config.cayman.max_backends_per_se = 4;
718                 rdev->config.cayman.max_texture_channel_caches = 8;
719                 rdev->config.cayman.max_gprs = 256;
720                 rdev->config.cayman.max_threads = 256;
721                 rdev->config.cayman.max_gs_threads = 32;
722                 rdev->config.cayman.max_stack_entries = 512;
723                 rdev->config.cayman.sx_num_of_sets = 8;
724                 rdev->config.cayman.sx_max_export_size = 256;
725                 rdev->config.cayman.sx_max_export_pos_size = 64;
726                 rdev->config.cayman.sx_max_export_smx_size = 192;
727                 rdev->config.cayman.max_hw_contexts = 8;
728                 rdev->config.cayman.sq_num_cf_insts = 2;
729
730                 rdev->config.cayman.sc_prim_fifo_size = 0x100;
731                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
732                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
733                 gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
734                 break;
735         case CHIP_ARUBA:
736         default:
737                 rdev->config.cayman.max_shader_engines = 1;
738                 rdev->config.cayman.max_pipes_per_simd = 4;
739                 rdev->config.cayman.max_tile_pipes = 2;
740                 if ((rdev->pdev->device == 0x9900) ||
741                     (rdev->pdev->device == 0x9901) ||
742                     (rdev->pdev->device == 0x9905) ||
743                     (rdev->pdev->device == 0x9906) ||
744                     (rdev->pdev->device == 0x9907) ||
745                     (rdev->pdev->device == 0x9908) ||
746                     (rdev->pdev->device == 0x9909) ||
747                     (rdev->pdev->device == 0x990B) ||
748                     (rdev->pdev->device == 0x990C) ||
749                     (rdev->pdev->device == 0x990F) ||
750                     (rdev->pdev->device == 0x9910) ||
751                     (rdev->pdev->device == 0x9917) ||
752                     (rdev->pdev->device == 0x9999) ||
753                     (rdev->pdev->device == 0x999C)) {
754                         rdev->config.cayman.max_simds_per_se = 6;
755                         rdev->config.cayman.max_backends_per_se = 2;
756                         rdev->config.cayman.max_hw_contexts = 8;
757                         rdev->config.cayman.sx_max_export_size = 256;
758                         rdev->config.cayman.sx_max_export_pos_size = 64;
759                         rdev->config.cayman.sx_max_export_smx_size = 192;
760                 } else if ((rdev->pdev->device == 0x9903) ||
761                            (rdev->pdev->device == 0x9904) ||
762                            (rdev->pdev->device == 0x990A) ||
763                            (rdev->pdev->device == 0x990D) ||
764                            (rdev->pdev->device == 0x990E) ||
765                            (rdev->pdev->device == 0x9913) ||
766                            (rdev->pdev->device == 0x9918) ||
767                            (rdev->pdev->device == 0x999D)) {
768                         rdev->config.cayman.max_simds_per_se = 4;
769                         rdev->config.cayman.max_backends_per_se = 2;
770                         rdev->config.cayman.max_hw_contexts = 8;
771                         rdev->config.cayman.sx_max_export_size = 256;
772                         rdev->config.cayman.sx_max_export_pos_size = 64;
773                         rdev->config.cayman.sx_max_export_smx_size = 192;
774                 } else if ((rdev->pdev->device == 0x9919) ||
775                            (rdev->pdev->device == 0x9990) ||
776                            (rdev->pdev->device == 0x9991) ||
777                            (rdev->pdev->device == 0x9994) ||
778                            (rdev->pdev->device == 0x9995) ||
779                            (rdev->pdev->device == 0x9996) ||
780                            (rdev->pdev->device == 0x999A) ||
781                            (rdev->pdev->device == 0x99A0)) {
782                         rdev->config.cayman.max_simds_per_se = 3;
783                         rdev->config.cayman.max_backends_per_se = 1;
784                         rdev->config.cayman.max_hw_contexts = 4;
785                         rdev->config.cayman.sx_max_export_size = 128;
786                         rdev->config.cayman.sx_max_export_pos_size = 32;
787                         rdev->config.cayman.sx_max_export_smx_size = 96;
788                 } else {
789                         rdev->config.cayman.max_simds_per_se = 2;
790                         rdev->config.cayman.max_backends_per_se = 1;
791                         rdev->config.cayman.max_hw_contexts = 4;
792                         rdev->config.cayman.sx_max_export_size = 128;
793                         rdev->config.cayman.sx_max_export_pos_size = 32;
794                         rdev->config.cayman.sx_max_export_smx_size = 96;
795                 }
796                 rdev->config.cayman.max_texture_channel_caches = 2;
797                 rdev->config.cayman.max_gprs = 256;
798                 rdev->config.cayman.max_threads = 256;
799                 rdev->config.cayman.max_gs_threads = 32;
800                 rdev->config.cayman.max_stack_entries = 512;
801                 rdev->config.cayman.sx_num_of_sets = 8;
802                 rdev->config.cayman.sq_num_cf_insts = 2;
803
804                 rdev->config.cayman.sc_prim_fifo_size = 0x40;
805                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
806                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
807                 gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
808                 break;
809         }
810
811         /* Initialize HDP */
812         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
813                 WREG32((0x2c14 + j), 0x00000000);
814                 WREG32((0x2c18 + j), 0x00000000);
815                 WREG32((0x2c1c + j), 0x00000000);
816                 WREG32((0x2c20 + j), 0x00000000);
817                 WREG32((0x2c24 + j), 0x00000000);
818         }
819
820         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
821
822         evergreen_fix_pci_max_read_req_size(rdev);
823
824         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
825         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
826
827         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
828         rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
829         if (rdev->config.cayman.mem_row_size_in_kb > 4)
830                 rdev->config.cayman.mem_row_size_in_kb = 4;
831         /* XXX use MC settings? */
832         rdev->config.cayman.shader_engine_tile_size = 32;
833         rdev->config.cayman.num_gpus = 1;
834         rdev->config.cayman.multi_gpu_tile_size = 64;
835
836         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
837         rdev->config.cayman.num_tile_pipes = (1 << tmp);
838         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
839         rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
840         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
841         rdev->config.cayman.num_shader_engines = tmp + 1;
842         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
843         rdev->config.cayman.num_gpus = tmp + 1;
844         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
845         rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
846         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
847         rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
848
849
850         /* setup tiling info dword.  gb_addr_config is not adequate since it does
851          * not have bank info, so create a custom tiling dword.
852          * bits 3:0   num_pipes
853          * bits 7:4   num_banks
854          * bits 11:8  group_size
855          * bits 15:12 row_size
856          */
857         rdev->config.cayman.tile_config = 0;
858         switch (rdev->config.cayman.num_tile_pipes) {
859         case 1:
860         default:
861                 rdev->config.cayman.tile_config |= (0 << 0);
862                 break;
863         case 2:
864                 rdev->config.cayman.tile_config |= (1 << 0);
865                 break;
866         case 4:
867                 rdev->config.cayman.tile_config |= (2 << 0);
868                 break;
869         case 8:
870                 rdev->config.cayman.tile_config |= (3 << 0);
871                 break;
872         }
873
874         /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
875         if (rdev->flags & RADEON_IS_IGP)
876                 rdev->config.cayman.tile_config |= 1 << 4;
877         else {
878                 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
879                 case 0: /* four banks */
880                         rdev->config.cayman.tile_config |= 0 << 4;
881                         break;
882                 case 1: /* eight banks */
883                         rdev->config.cayman.tile_config |= 1 << 4;
884                         break;
885                 case 2: /* sixteen banks */
886                 default:
887                         rdev->config.cayman.tile_config |= 2 << 4;
888                         break;
889                 }
890         }
891         rdev->config.cayman.tile_config |=
892                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
893         rdev->config.cayman.tile_config |=
894                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
895
896         tmp = 0;
897         for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
898                 u32 rb_disable_bitmap;
899
900                 WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
901                 WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
902                 rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
903                 tmp <<= 4;
904                 tmp |= rb_disable_bitmap;
905         }
906         /* enabled rb are just the one not disabled :) */
907         disabled_rb_mask = tmp;
908         tmp = 0;
909         for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
910                 tmp |= (1 << i);
911         /* if all the backends are disabled, fix it up here */
912         if ((disabled_rb_mask & tmp) == tmp) {
913                 for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
914                         disabled_rb_mask &= ~(1 << i);
915         }
916
917         WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
918         WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
919
920         WREG32(GB_ADDR_CONFIG, gb_addr_config);
921         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
922         if (ASIC_IS_DCE6(rdev))
923                 WREG32(DMIF_ADDR_CALC, gb_addr_config);
924         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
925         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
926         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
927         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
928         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
929         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
930
931         if ((rdev->config.cayman.max_backends_per_se == 1) &&
932             (rdev->flags & RADEON_IS_IGP)) {
933                 if ((disabled_rb_mask & 3) == 1) {
934                         /* RB0 disabled, RB1 enabled */
935                         tmp = 0x11111111;
936                 } else {
937                         /* RB1 disabled, RB0 enabled */
938                         tmp = 0x00000000;
939                 }
940         } else {
941                 tmp = gb_addr_config & NUM_PIPES_MASK;
942                 tmp = r6xx_remap_render_backend(rdev, tmp,
943                                                 rdev->config.cayman.max_backends_per_se *
944                                                 rdev->config.cayman.max_shader_engines,
945                                                 CAYMAN_MAX_BACKENDS, disabled_rb_mask);
946         }
947         WREG32(GB_BACKEND_MAP, tmp);
948
949         cgts_tcc_disable = 0xffff0000;
950         for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
951                 cgts_tcc_disable &= ~(1 << (16 + i));
952         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
953         WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
954         WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
955         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
956
957         /* reprogram the shader complex */
958         cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
959         for (i = 0; i < 16; i++)
960                 WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
961         WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
962
963         /* set HW defaults for 3D engine */
964         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
965
966         sx_debug_1 = RREG32(SX_DEBUG_1);
967         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
968         WREG32(SX_DEBUG_1, sx_debug_1);
969
970         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
971         smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
972         smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
973         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
974
975         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
976
977         /* need to be explicitly zero-ed */
978         WREG32(VGT_OFFCHIP_LDS_BASE, 0);
979         WREG32(SQ_LSTMP_RING_BASE, 0);
980         WREG32(SQ_HSTMP_RING_BASE, 0);
981         WREG32(SQ_ESTMP_RING_BASE, 0);
982         WREG32(SQ_GSTMP_RING_BASE, 0);
983         WREG32(SQ_VSTMP_RING_BASE, 0);
984         WREG32(SQ_PSTMP_RING_BASE, 0);
985
986         WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
987
988         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
989                                         POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
990                                         SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
991
992         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
993                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
994                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
995
996
997         WREG32(VGT_NUM_INSTANCES, 1);
998
999         WREG32(CP_PERFMON_CNTL, 0);
1000
1001         WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1002                                   FETCH_FIFO_HIWATER(0x4) |
1003                                   DONE_FIFO_HIWATER(0xe0) |
1004                                   ALU_UPDATE_FIFO_HIWATER(0x8)));
1005
1006         WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1007         WREG32(SQ_CONFIG, (VC_ENABLE |
1008                            EXPORT_SRC_C |
1009                            GFX_PRIO(0) |
1010                            CS1_PRIO(0) |
1011                            CS2_PRIO(1)));
1012         WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1013
1014         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1015                                           FORCE_EOV_MAX_REZ_CNT(255)));
1016
1017         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1018                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1019
1020         WREG32(VGT_GS_VERTEX_REUSE, 16);
1021         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1022
1023         WREG32(CB_PERF_CTR0_SEL_0, 0);
1024         WREG32(CB_PERF_CTR0_SEL_1, 0);
1025         WREG32(CB_PERF_CTR1_SEL_0, 0);
1026         WREG32(CB_PERF_CTR1_SEL_1, 0);
1027         WREG32(CB_PERF_CTR2_SEL_0, 0);
1028         WREG32(CB_PERF_CTR2_SEL_1, 0);
1029         WREG32(CB_PERF_CTR3_SEL_0, 0);
1030         WREG32(CB_PERF_CTR3_SEL_1, 0);
1031
1032         tmp = RREG32(HDP_MISC_CNTL);
1033         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1034         WREG32(HDP_MISC_CNTL, tmp);
1035
1036         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1037         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1038
1039         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1040
1041         udelay(50);
1042 }
1043
1044 /*
1045  * GART
1046  */
1047 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1048 {
1049         /* flush hdp cache */
1050         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1051
1052         /* bits 0-7 are the VM contexts0-7 */
1053         WREG32(VM_INVALIDATE_REQUEST, 1);
1054 }
1055
1056 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1057 {
1058         int i, r;
1059
1060         if (rdev->gart.robj == NULL) {
1061                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1062                 return -EINVAL;
1063         }
1064         r = radeon_gart_table_vram_pin(rdev);
1065         if (r)
1066                 return r;
1067         radeon_gart_restore(rdev);
1068         /* Setup TLB control */
1069         WREG32(MC_VM_MX_L1_TLB_CNTL,
1070                (0xA << 7) |
1071                ENABLE_L1_TLB |
1072                ENABLE_L1_FRAGMENT_PROCESSING |
1073                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1074                ENABLE_ADVANCED_DRIVER_MODEL |
1075                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1076         /* Setup L2 cache */
1077         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1078                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1079                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1080                EFFECTIVE_L2_QUEUE_SIZE(7) |
1081                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1082         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1083         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1084                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1085         /* setup context0 */
1086         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1087         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1088         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1089         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1090                         (u32)(rdev->dummy_page.addr >> 12));
1091         WREG32(VM_CONTEXT0_CNTL2, 0);
1092         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1093                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1094
1095         WREG32(0x15D4, 0);
1096         WREG32(0x15D8, 0);
1097         WREG32(0x15DC, 0);
1098
1099         /* empty context1-7 */
1100         /* Assign the pt base to something valid for now; the pts used for
1101          * the VMs are determined by the application and setup and assigned
1102          * on the fly in the vm part of radeon_gart.c
1103          */
1104         for (i = 1; i < 8; i++) {
1105                 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1106                 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1107                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1108                         rdev->gart.table_addr >> 12);
1109         }
1110
1111         /* enable context1-7 */
1112         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1113                (u32)(rdev->dummy_page.addr >> 12));
1114         WREG32(VM_CONTEXT1_CNTL2, 4);
1115         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1116                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1117                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1118                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1119                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1120                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1121                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1122                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1123                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1124                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1125                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1126                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1127                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1128
1129         cayman_pcie_gart_tlb_flush(rdev);
1130         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1131                  (unsigned)(rdev->mc.gtt_size >> 20),
1132                  (unsigned long long)rdev->gart.table_addr);
1133         rdev->gart.ready = true;
1134         return 0;
1135 }
1136
1137 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1138 {
1139         /* Disable all tables */
1140         WREG32(VM_CONTEXT0_CNTL, 0);
1141         WREG32(VM_CONTEXT1_CNTL, 0);
1142         /* Setup TLB control */
1143         WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1144                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1145                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1146         /* Setup L2 cache */
1147         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1148                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1149                EFFECTIVE_L2_QUEUE_SIZE(7) |
1150                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1151         WREG32(VM_L2_CNTL2, 0);
1152         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1153                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1154         radeon_gart_table_vram_unpin(rdev);
1155 }
1156
1157 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1158 {
1159         cayman_pcie_gart_disable(rdev);
1160         radeon_gart_table_vram_free(rdev);
1161         radeon_gart_fini(rdev);
1162 }
1163
1164 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1165                               int ring, u32 cp_int_cntl)
1166 {
1167         u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1168
1169         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1170         WREG32(CP_INT_CNTL, cp_int_cntl);
1171 }
1172
1173 /*
1174  * CP.
1175  */
1176 void cayman_fence_ring_emit(struct radeon_device *rdev,
1177                             struct radeon_fence *fence)
1178 {
1179         struct radeon_ring *ring = &rdev->ring[fence->ring];
1180         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1181         u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
1182                 PACKET3_SH_ACTION_ENA;
1183
1184         /* flush read cache over gart for this vmid */
1185         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1186         radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
1187         radeon_ring_write(ring, 0xFFFFFFFF);
1188         radeon_ring_write(ring, 0);
1189         radeon_ring_write(ring, 10); /* poll interval */
1190         /* EVENT_WRITE_EOP - flush caches, send int */
1191         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1192         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1193         radeon_ring_write(ring, addr & 0xffffffff);
1194         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1195         radeon_ring_write(ring, fence->seq);
1196         radeon_ring_write(ring, 0);
1197 }
1198
1199 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1200 {
1201         struct radeon_ring *ring = &rdev->ring[ib->ring];
1202         u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
1203                 PACKET3_SH_ACTION_ENA;
1204
1205         /* set to DX10/11 mode */
1206         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1207         radeon_ring_write(ring, 1);
1208
1209         if (ring->rptr_save_reg) {
1210                 uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1211                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1212                 radeon_ring_write(ring, ((ring->rptr_save_reg - 
1213                                           PACKET3_SET_CONFIG_REG_START) >> 2));
1214                 radeon_ring_write(ring, next_rptr);
1215         }
1216
1217         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1218         radeon_ring_write(ring,
1219 #ifdef __BIG_ENDIAN
1220                           (2 << 0) |
1221 #endif
1222                           (ib->gpu_addr & 0xFFFFFFFC));
1223         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1224         radeon_ring_write(ring, ib->length_dw | 
1225                           (ib->vm ? (ib->vm->id << 24) : 0));
1226
1227         /* flush read cache over gart for this vmid */
1228         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1229         radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
1230         radeon_ring_write(ring, 0xFFFFFFFF);
1231         radeon_ring_write(ring, 0);
1232         radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */
1233 }
1234
1235 void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1236                                struct radeon_ring *ring,
1237                                struct radeon_semaphore *semaphore,
1238                                bool emit_wait)
1239 {
1240         uint64_t addr = semaphore->gpu_addr;
1241
1242         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
1243         radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
1244
1245         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
1246         radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
1247
1248         radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
1249         radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
1250 }
1251
1252 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1253 {
1254         if (enable)
1255                 WREG32(CP_ME_CNTL, 0);
1256         else {
1257                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1258                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1259                 WREG32(SCRATCH_UMSK, 0);
1260                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1261         }
1262 }
1263
1264 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1265 {
1266         const __be32 *fw_data;
1267         int i;
1268
1269         if (!rdev->me_fw || !rdev->pfp_fw)
1270                 return -EINVAL;
1271
1272         cayman_cp_enable(rdev, false);
1273
1274         fw_data = (const __be32 *)rdev->pfp_fw->data;
1275         WREG32(CP_PFP_UCODE_ADDR, 0);
1276         for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1277                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1278         WREG32(CP_PFP_UCODE_ADDR, 0);
1279
1280         fw_data = (const __be32 *)rdev->me_fw->data;
1281         WREG32(CP_ME_RAM_WADDR, 0);
1282         for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1283                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1284
1285         WREG32(CP_PFP_UCODE_ADDR, 0);
1286         WREG32(CP_ME_RAM_WADDR, 0);
1287         WREG32(CP_ME_RAM_RADDR, 0);
1288         return 0;
1289 }
1290
1291 static int cayman_cp_start(struct radeon_device *rdev)
1292 {
1293         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1294         int r, i;
1295
1296         r = radeon_ring_lock(rdev, ring, 7);
1297         if (r) {
1298                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1299                 return r;
1300         }
1301         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1302         radeon_ring_write(ring, 0x1);
1303         radeon_ring_write(ring, 0x0);
1304         radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1305         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1306         radeon_ring_write(ring, 0);
1307         radeon_ring_write(ring, 0);
1308         radeon_ring_unlock_commit(rdev, ring);
1309
1310         cayman_cp_enable(rdev, true);
1311
1312         r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1313         if (r) {
1314                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1315                 return r;
1316         }
1317
1318         /* setup clear context state */
1319         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1320         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1321
1322         for (i = 0; i < cayman_default_size; i++)
1323                 radeon_ring_write(ring, cayman_default_state[i]);
1324
1325         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1326         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1327
1328         /* set clear context state */
1329         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1330         radeon_ring_write(ring, 0);
1331
1332         /* SQ_VTX_BASE_VTX_LOC */
1333         radeon_ring_write(ring, 0xc0026f00);
1334         radeon_ring_write(ring, 0x00000000);
1335         radeon_ring_write(ring, 0x00000000);
1336         radeon_ring_write(ring, 0x00000000);
1337
1338         /* Clear consts */
1339         radeon_ring_write(ring, 0xc0036f00);
1340         radeon_ring_write(ring, 0x00000bc4);
1341         radeon_ring_write(ring, 0xffffffff);
1342         radeon_ring_write(ring, 0xffffffff);
1343         radeon_ring_write(ring, 0xffffffff);
1344
1345         radeon_ring_write(ring, 0xc0026900);
1346         radeon_ring_write(ring, 0x00000316);
1347         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1348         radeon_ring_write(ring, 0x00000010); /*  */
1349
1350         radeon_ring_unlock_commit(rdev, ring);
1351
1352         /* XXX init other rings */
1353
1354         return 0;
1355 }
1356
1357 static void cayman_cp_fini(struct radeon_device *rdev)
1358 {
1359         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1360         cayman_cp_enable(rdev, false);
1361         radeon_ring_fini(rdev, ring);
1362         radeon_scratch_free(rdev, ring->rptr_save_reg);
1363 }
1364
1365 static int cayman_cp_resume(struct radeon_device *rdev)
1366 {
1367         static const int ridx[] = {
1368                 RADEON_RING_TYPE_GFX_INDEX,
1369                 CAYMAN_RING_TYPE_CP1_INDEX,
1370                 CAYMAN_RING_TYPE_CP2_INDEX
1371         };
1372         static const unsigned cp_rb_cntl[] = {
1373                 CP_RB0_CNTL,
1374                 CP_RB1_CNTL,
1375                 CP_RB2_CNTL,
1376         };
1377         static const unsigned cp_rb_rptr_addr[] = {
1378                 CP_RB0_RPTR_ADDR,
1379                 CP_RB1_RPTR_ADDR,
1380                 CP_RB2_RPTR_ADDR
1381         };
1382         static const unsigned cp_rb_rptr_addr_hi[] = {
1383                 CP_RB0_RPTR_ADDR_HI,
1384                 CP_RB1_RPTR_ADDR_HI,
1385                 CP_RB2_RPTR_ADDR_HI
1386         };
1387         static const unsigned cp_rb_base[] = {
1388                 CP_RB0_BASE,
1389                 CP_RB1_BASE,
1390                 CP_RB2_BASE
1391         };
1392         struct radeon_ring *ring;
1393         int i, r;
1394
1395         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1396         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1397                                  SOFT_RESET_PA |
1398                                  SOFT_RESET_SH |
1399                                  SOFT_RESET_VGT |
1400                                  SOFT_RESET_SPI |
1401                                  SOFT_RESET_SX));
1402         RREG32(GRBM_SOFT_RESET);
1403         mdelay(15);
1404         WREG32(GRBM_SOFT_RESET, 0);
1405         RREG32(GRBM_SOFT_RESET);
1406
1407         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1408         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1409
1410         /* Set the write pointer delay */
1411         WREG32(CP_RB_WPTR_DELAY, 0);
1412
1413         WREG32(CP_DEBUG, (1 << 27));
1414
1415         /* set the wb address whether it's enabled or not */
1416         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1417         WREG32(SCRATCH_UMSK, 0xff);
1418
1419         for (i = 0; i < 3; ++i) {
1420                 uint32_t rb_cntl;
1421                 uint64_t addr;
1422
1423                 /* Set ring buffer size */
1424                 ring = &rdev->ring[ridx[i]];
1425                 rb_cntl = drm_order(ring->ring_size / 8);
1426                 rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1427 #ifdef __BIG_ENDIAN
1428                 rb_cntl |= BUF_SWAP_32BIT;
1429 #endif
1430                 WREG32(cp_rb_cntl[i], rb_cntl);
1431
1432                 /* set the wb address whether it's enabled or not */
1433                 addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1434                 WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1435                 WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1436         }
1437
1438         /* set the rb base addr, this causes an internal reset of ALL rings */
1439         for (i = 0; i < 3; ++i) {
1440                 ring = &rdev->ring[ridx[i]];
1441                 WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1442         }
1443
1444         for (i = 0; i < 3; ++i) {
1445                 /* Initialize the ring buffer's read and write pointers */
1446                 ring = &rdev->ring[ridx[i]];
1447                 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1448
1449                 ring->rptr = ring->wptr = 0;
1450                 WREG32(ring->rptr_reg, ring->rptr);
1451                 WREG32(ring->wptr_reg, ring->wptr);
1452
1453                 mdelay(1);
1454                 WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1455         }
1456
1457         /* start the rings */
1458         cayman_cp_start(rdev);
1459         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1460         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1461         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1462         /* this only test cp0 */
1463         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1464         if (r) {
1465                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1466                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1467                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1468                 return r;
1469         }
1470
1471         return 0;
1472 }
1473
1474 /*
1475  * DMA
1476  * Starting with R600, the GPU has an asynchronous
1477  * DMA engine.  The programming model is very similar
1478  * to the 3D engine (ring buffer, IBs, etc.), but the
1479  * DMA controller has it's own packet format that is
1480  * different form the PM4 format used by the 3D engine.
1481  * It supports copying data, writing embedded data,
1482  * solid fills, and a number of other things.  It also
1483  * has support for tiling/detiling of buffers.
1484  * Cayman and newer support two asynchronous DMA engines.
1485  */
1486 /**
1487  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1488  *
1489  * @rdev: radeon_device pointer
1490  * @ib: IB object to schedule
1491  *
1492  * Schedule an IB in the DMA ring (cayman-SI).
1493  */
1494 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1495                                 struct radeon_ib *ib)
1496 {
1497         struct radeon_ring *ring = &rdev->ring[ib->ring];
1498
1499         if (rdev->wb.enabled) {
1500                 u32 next_rptr = ring->wptr + 4;
1501                 while ((next_rptr & 7) != 5)
1502                         next_rptr++;
1503                 next_rptr += 3;
1504                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1505                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1506                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1507                 radeon_ring_write(ring, next_rptr);
1508         }
1509
1510         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1511          * Pad as necessary with NOPs.
1512          */
1513         while ((ring->wptr & 7) != 5)
1514                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1515         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1516         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1517         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1518
1519 }
1520
1521 /**
1522  * cayman_dma_stop - stop the async dma engines
1523  *
1524  * @rdev: radeon_device pointer
1525  *
1526  * Stop the async dma engines (cayman-SI).
1527  */
1528 void cayman_dma_stop(struct radeon_device *rdev)
1529 {
1530         u32 rb_cntl;
1531
1532         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1533
1534         /* dma0 */
1535         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1536         rb_cntl &= ~DMA_RB_ENABLE;
1537         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1538
1539         /* dma1 */
1540         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1541         rb_cntl &= ~DMA_RB_ENABLE;
1542         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1543
1544         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1545         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1546 }
1547
1548 /**
1549  * cayman_dma_resume - setup and start the async dma engines
1550  *
1551  * @rdev: radeon_device pointer
1552  *
1553  * Set up the DMA ring buffers and enable them. (cayman-SI).
1554  * Returns 0 for success, error for failure.
1555  */
1556 int cayman_dma_resume(struct radeon_device *rdev)
1557 {
1558         struct radeon_ring *ring;
1559         u32 rb_cntl, dma_cntl, ib_cntl;
1560         u32 rb_bufsz;
1561         u32 reg_offset, wb_offset;
1562         int i, r;
1563
1564         /* Reset dma */
1565         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1566         RREG32(SRBM_SOFT_RESET);
1567         udelay(50);
1568         WREG32(SRBM_SOFT_RESET, 0);
1569
1570         for (i = 0; i < 2; i++) {
1571                 if (i == 0) {
1572                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1573                         reg_offset = DMA0_REGISTER_OFFSET;
1574                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
1575                 } else {
1576                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1577                         reg_offset = DMA1_REGISTER_OFFSET;
1578                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1579                 }
1580
1581                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1582                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1583
1584                 /* Set ring buffer size in dwords */
1585                 rb_bufsz = drm_order(ring->ring_size / 4);
1586                 rb_cntl = rb_bufsz << 1;
1587 #ifdef __BIG_ENDIAN
1588                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1589 #endif
1590                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1591
1592                 /* Initialize the ring buffer's read and write pointers */
1593                 WREG32(DMA_RB_RPTR + reg_offset, 0);
1594                 WREG32(DMA_RB_WPTR + reg_offset, 0);
1595
1596                 /* set the wb address whether it's enabled or not */
1597                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1598                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1599                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1600                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1601
1602                 if (rdev->wb.enabled)
1603                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1604
1605                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1606
1607                 /* enable DMA IBs */
1608                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1609 #ifdef __BIG_ENDIAN
1610                 ib_cntl |= DMA_IB_SWAP_ENABLE;
1611 #endif
1612                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1613
1614                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1615                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1616                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1617
1618                 ring->wptr = 0;
1619                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1620
1621                 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1622
1623                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1624
1625                 ring->ready = true;
1626
1627                 r = radeon_ring_test(rdev, ring->idx, ring);
1628                 if (r) {
1629                         ring->ready = false;
1630                         return r;
1631                 }
1632         }
1633
1634         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1635
1636         return 0;
1637 }
1638
1639 /**
1640  * cayman_dma_fini - tear down the async dma engines
1641  *
1642  * @rdev: radeon_device pointer
1643  *
1644  * Stop the async dma engines and free the rings (cayman-SI).
1645  */
1646 void cayman_dma_fini(struct radeon_device *rdev)
1647 {
1648         cayman_dma_stop(rdev);
1649         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1650         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1651 }
1652
1653 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1654 {
1655         u32 reset_mask = 0;
1656         u32 tmp;
1657
1658         /* GRBM_STATUS */
1659         tmp = RREG32(GRBM_STATUS);
1660         if (tmp & (PA_BUSY | SC_BUSY |
1661                    SH_BUSY | SX_BUSY |
1662                    TA_BUSY | VGT_BUSY |
1663                    DB_BUSY | CB_BUSY |
1664                    GDS_BUSY | SPI_BUSY |
1665                    IA_BUSY | IA_BUSY_NO_DMA))
1666                 reset_mask |= RADEON_RESET_GFX;
1667
1668         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1669                    CP_BUSY | CP_COHERENCY_BUSY))
1670                 reset_mask |= RADEON_RESET_CP;
1671
1672         if (tmp & GRBM_EE_BUSY)
1673                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1674
1675         /* DMA_STATUS_REG 0 */
1676         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1677         if (!(tmp & DMA_IDLE))
1678                 reset_mask |= RADEON_RESET_DMA;
1679
1680         /* DMA_STATUS_REG 1 */
1681         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1682         if (!(tmp & DMA_IDLE))
1683                 reset_mask |= RADEON_RESET_DMA1;
1684
1685         /* SRBM_STATUS2 */
1686         tmp = RREG32(SRBM_STATUS2);
1687         if (tmp & DMA_BUSY)
1688                 reset_mask |= RADEON_RESET_DMA;
1689
1690         if (tmp & DMA1_BUSY)
1691                 reset_mask |= RADEON_RESET_DMA1;
1692
1693         /* SRBM_STATUS */
1694         tmp = RREG32(SRBM_STATUS);
1695         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1696                 reset_mask |= RADEON_RESET_RLC;
1697
1698         if (tmp & IH_BUSY)
1699                 reset_mask |= RADEON_RESET_IH;
1700
1701         if (tmp & SEM_BUSY)
1702                 reset_mask |= RADEON_RESET_SEM;
1703
1704         if (tmp & GRBM_RQ_PENDING)
1705                 reset_mask |= RADEON_RESET_GRBM;
1706
1707         if (tmp & VMC_BUSY)
1708                 reset_mask |= RADEON_RESET_VMC;
1709
1710         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1711                    MCC_BUSY | MCD_BUSY))
1712                 reset_mask |= RADEON_RESET_MC;
1713
1714         if (evergreen_is_display_hung(rdev))
1715                 reset_mask |= RADEON_RESET_DISPLAY;
1716
1717         /* VM_L2_STATUS */
1718         tmp = RREG32(VM_L2_STATUS);
1719         if (tmp & L2_BUSY)
1720                 reset_mask |= RADEON_RESET_VMC;
1721
1722         /* Skip MC reset as it's mostly likely not hung, just busy */
1723         if (reset_mask & RADEON_RESET_MC) {
1724                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1725                 reset_mask &= ~RADEON_RESET_MC;
1726         }
1727
1728         return reset_mask;
1729 }
1730
1731 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1732 {
1733         struct evergreen_mc_save save;
1734         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1735         u32 tmp;
1736
1737         if (reset_mask == 0)
1738                 return;
1739
1740         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1741
1742         evergreen_print_gpu_status_regs(rdev);
1743         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1744                  RREG32(0x14F8));
1745         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1746                  RREG32(0x14D8));
1747         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1748                  RREG32(0x14FC));
1749         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1750                  RREG32(0x14DC));
1751
1752         /* Disable CP parsing/prefetching */
1753         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1754
1755         if (reset_mask & RADEON_RESET_DMA) {
1756                 /* dma0 */
1757                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1758                 tmp &= ~DMA_RB_ENABLE;
1759                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1760         }
1761
1762         if (reset_mask & RADEON_RESET_DMA1) {
1763                 /* dma1 */
1764                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1765                 tmp &= ~DMA_RB_ENABLE;
1766                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1767         }
1768
1769         udelay(50);
1770
1771         evergreen_mc_stop(rdev, &save);
1772         if (evergreen_mc_wait_for_idle(rdev)) {
1773                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1774         }
1775
1776         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1777                 grbm_soft_reset = SOFT_RESET_CB |
1778                         SOFT_RESET_DB |
1779                         SOFT_RESET_GDS |
1780                         SOFT_RESET_PA |
1781                         SOFT_RESET_SC |
1782                         SOFT_RESET_SPI |
1783                         SOFT_RESET_SH |
1784                         SOFT_RESET_SX |
1785                         SOFT_RESET_TC |
1786                         SOFT_RESET_TA |
1787                         SOFT_RESET_VGT |
1788                         SOFT_RESET_IA;
1789         }
1790
1791         if (reset_mask & RADEON_RESET_CP) {
1792                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1793
1794                 srbm_soft_reset |= SOFT_RESET_GRBM;
1795         }
1796
1797         if (reset_mask & RADEON_RESET_DMA)
1798                 srbm_soft_reset |= SOFT_RESET_DMA;
1799
1800         if (reset_mask & RADEON_RESET_DMA1)
1801                 srbm_soft_reset |= SOFT_RESET_DMA1;
1802
1803         if (reset_mask & RADEON_RESET_DISPLAY)
1804                 srbm_soft_reset |= SOFT_RESET_DC;
1805
1806         if (reset_mask & RADEON_RESET_RLC)
1807                 srbm_soft_reset |= SOFT_RESET_RLC;
1808
1809         if (reset_mask & RADEON_RESET_SEM)
1810                 srbm_soft_reset |= SOFT_RESET_SEM;
1811
1812         if (reset_mask & RADEON_RESET_IH)
1813                 srbm_soft_reset |= SOFT_RESET_IH;
1814
1815         if (reset_mask & RADEON_RESET_GRBM)
1816                 srbm_soft_reset |= SOFT_RESET_GRBM;
1817
1818         if (reset_mask & RADEON_RESET_VMC)
1819                 srbm_soft_reset |= SOFT_RESET_VMC;
1820
1821         if (!(rdev->flags & RADEON_IS_IGP)) {
1822                 if (reset_mask & RADEON_RESET_MC)
1823                         srbm_soft_reset |= SOFT_RESET_MC;
1824         }
1825
1826         if (grbm_soft_reset) {
1827                 tmp = RREG32(GRBM_SOFT_RESET);
1828                 tmp |= grbm_soft_reset;
1829                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1830                 WREG32(GRBM_SOFT_RESET, tmp);
1831                 tmp = RREG32(GRBM_SOFT_RESET);
1832
1833                 udelay(50);
1834
1835                 tmp &= ~grbm_soft_reset;
1836                 WREG32(GRBM_SOFT_RESET, tmp);
1837                 tmp = RREG32(GRBM_SOFT_RESET);
1838         }
1839
1840         if (srbm_soft_reset) {
1841                 tmp = RREG32(SRBM_SOFT_RESET);
1842                 tmp |= srbm_soft_reset;
1843                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1844                 WREG32(SRBM_SOFT_RESET, tmp);
1845                 tmp = RREG32(SRBM_SOFT_RESET);
1846
1847                 udelay(50);
1848
1849                 tmp &= ~srbm_soft_reset;
1850                 WREG32(SRBM_SOFT_RESET, tmp);
1851                 tmp = RREG32(SRBM_SOFT_RESET);
1852         }
1853
1854         /* Wait a little for things to settle down */
1855         udelay(50);
1856
1857         evergreen_mc_resume(rdev, &save);
1858         udelay(50);
1859
1860         evergreen_print_gpu_status_regs(rdev);
1861 }
1862
1863 int cayman_asic_reset(struct radeon_device *rdev)
1864 {
1865         u32 reset_mask;
1866
1867         reset_mask = cayman_gpu_check_soft_reset(rdev);
1868
1869         if (reset_mask)
1870                 r600_set_bios_scratch_engine_hung(rdev, true);
1871
1872         cayman_gpu_soft_reset(rdev, reset_mask);
1873
1874         reset_mask = cayman_gpu_check_soft_reset(rdev);
1875
1876         if (!reset_mask)
1877                 r600_set_bios_scratch_engine_hung(rdev, false);
1878
1879         return 0;
1880 }
1881
1882 /**
1883  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1884  *
1885  * @rdev: radeon_device pointer
1886  * @ring: radeon_ring structure holding ring information
1887  *
1888  * Check if the GFX engine is locked up.
1889  * Returns true if the engine appears to be locked up, false if not.
1890  */
1891 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1892 {
1893         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1894
1895         if (!(reset_mask & (RADEON_RESET_GFX |
1896                             RADEON_RESET_COMPUTE |
1897                             RADEON_RESET_CP))) {
1898                 radeon_ring_lockup_update(ring);
1899                 return false;
1900         }
1901         /* force CP activities */
1902         radeon_ring_force_activity(rdev, ring);
1903         return radeon_ring_test_lockup(rdev, ring);
1904 }
1905
1906 /**
1907  * cayman_dma_is_lockup - Check if the DMA engine is locked up
1908  *
1909  * @rdev: radeon_device pointer
1910  * @ring: radeon_ring structure holding ring information
1911  *
1912  * Check if the async DMA engine is locked up.
1913  * Returns true if the engine appears to be locked up, false if not.
1914  */
1915 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1916 {
1917         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1918         u32 mask;
1919
1920         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1921                 mask = RADEON_RESET_DMA;
1922         else
1923                 mask = RADEON_RESET_DMA1;
1924
1925         if (!(reset_mask & mask)) {
1926                 radeon_ring_lockup_update(ring);
1927                 return false;
1928         }
1929         /* force ring activities */
1930         radeon_ring_force_activity(rdev, ring);
1931         return radeon_ring_test_lockup(rdev, ring);
1932 }
1933
1934 static int cayman_startup(struct radeon_device *rdev)
1935 {
1936         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1937         int r;
1938
1939         /* enable pcie gen2 link */
1940         evergreen_pcie_gen2_enable(rdev);
1941
1942         evergreen_mc_program(rdev);
1943
1944         if (rdev->flags & RADEON_IS_IGP) {
1945                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1946                         r = ni_init_microcode(rdev);
1947                         if (r) {
1948                                 DRM_ERROR("Failed to load firmware!\n");
1949                                 return r;
1950                         }
1951                 }
1952         } else {
1953                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1954                         r = ni_init_microcode(rdev);
1955                         if (r) {
1956                                 DRM_ERROR("Failed to load firmware!\n");
1957                                 return r;
1958                         }
1959                 }
1960
1961                 r = ni_mc_load_microcode(rdev);
1962                 if (r) {
1963                         DRM_ERROR("Failed to load MC firmware!\n");
1964                         return r;
1965                 }
1966         }
1967
1968         r = r600_vram_scratch_init(rdev);
1969         if (r)
1970                 return r;
1971
1972         r = cayman_pcie_gart_enable(rdev);
1973         if (r)
1974                 return r;
1975         cayman_gpu_init(rdev);
1976
1977         r = evergreen_blit_init(rdev);
1978         if (r) {
1979                 r600_blit_fini(rdev);
1980                 rdev->asic->copy.copy = NULL;
1981                 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1982         }
1983
1984         /* allocate rlc buffers */
1985         if (rdev->flags & RADEON_IS_IGP) {
1986                 r = si_rlc_init(rdev);
1987                 if (r) {
1988                         DRM_ERROR("Failed to init rlc BOs!\n");
1989                         return r;
1990                 }
1991         }
1992
1993         /* allocate wb buffer */
1994         r = radeon_wb_init(rdev);
1995         if (r)
1996                 return r;
1997
1998         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1999         if (r) {
2000                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2001                 return r;
2002         }
2003
2004         r = rv770_uvd_resume(rdev);
2005         if (!r) {
2006                 r = radeon_fence_driver_start_ring(rdev,
2007                                                    R600_RING_TYPE_UVD_INDEX);
2008                 if (r)
2009                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2010         }
2011         if (r)
2012                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2013
2014         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2015         if (r) {
2016                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2017                 return r;
2018         }
2019
2020         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2021         if (r) {
2022                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2023                 return r;
2024         }
2025
2026         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2027         if (r) {
2028                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2029                 return r;
2030         }
2031
2032         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2033         if (r) {
2034                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2035                 return r;
2036         }
2037
2038         /* Enable IRQ */
2039         if (!rdev->irq.installed) {
2040                 r = radeon_irq_kms_init(rdev);
2041                 if (r)
2042                         return r;
2043         }
2044
2045         r = r600_irq_init(rdev);
2046         if (r) {
2047                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
2048                 radeon_irq_kms_fini(rdev);
2049                 return r;
2050         }
2051         evergreen_irq_set(rdev);
2052
2053         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2054                              CP_RB0_RPTR, CP_RB0_WPTR,
2055                              0, 0xfffff, RADEON_CP_PACKET2);
2056         if (r)
2057                 return r;
2058
2059         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2060         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2061                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2062                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2063                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2064         if (r)
2065                 return r;
2066
2067         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2068         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2069                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2070                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2071                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2072         if (r)
2073                 return r;
2074
2075         r = cayman_cp_load_microcode(rdev);
2076         if (r)
2077                 return r;
2078         r = cayman_cp_resume(rdev);
2079         if (r)
2080                 return r;
2081
2082         r = cayman_dma_resume(rdev);
2083         if (r)
2084                 return r;
2085
2086         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2087         if (ring->ring_size) {
2088                 r = radeon_ring_init(rdev, ring, ring->ring_size,
2089                                      R600_WB_UVD_RPTR_OFFSET,
2090                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2091                                      0, 0xfffff, RADEON_CP_PACKET2);
2092                 if (!r)
2093                         r = r600_uvd_init(rdev);
2094                 if (r)
2095                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2096         }
2097
2098         r = radeon_ib_pool_init(rdev);
2099         if (r) {
2100                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2101                 return r;
2102         }
2103
2104         r = radeon_vm_manager_init(rdev);
2105         if (r) {
2106                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2107                 return r;
2108         }
2109
2110         r = r600_audio_init(rdev);
2111         if (r)
2112                 return r;
2113
2114         return 0;
2115 }
2116
2117 int cayman_resume(struct radeon_device *rdev)
2118 {
2119         int r;
2120
2121         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2122          * posting will perform necessary task to bring back GPU into good
2123          * shape.
2124          */
2125         /* post card */
2126         atom_asic_init(rdev->mode_info.atom_context);
2127
2128         /* init golden registers */
2129         ni_init_golden_registers(rdev);
2130
2131         rdev->accel_working = true;
2132         r = cayman_startup(rdev);
2133         if (r) {
2134                 DRM_ERROR("cayman startup failed on resume\n");
2135                 rdev->accel_working = false;
2136                 return r;
2137         }
2138         return r;
2139 }
2140
2141 int cayman_suspend(struct radeon_device *rdev)
2142 {
2143         r600_audio_fini(rdev);
2144         radeon_vm_manager_fini(rdev);
2145         cayman_cp_enable(rdev, false);
2146         cayman_dma_stop(rdev);
2147         r600_uvd_stop(rdev);
2148         radeon_uvd_suspend(rdev);
2149         evergreen_irq_suspend(rdev);
2150         radeon_wb_disable(rdev);
2151         cayman_pcie_gart_disable(rdev);
2152         return 0;
2153 }
2154
2155 /* Plan is to move initialization in that function and use
2156  * helper function so that radeon_device_init pretty much
2157  * do nothing more than calling asic specific function. This
2158  * should also allow to remove a bunch of callback function
2159  * like vram_info.
2160  */
2161 int cayman_init(struct radeon_device *rdev)
2162 {
2163         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2164         int r;
2165
2166         /* Read BIOS */
2167         if (!radeon_get_bios(rdev)) {
2168                 if (ASIC_IS_AVIVO(rdev))
2169                         return -EINVAL;
2170         }
2171         /* Must be an ATOMBIOS */
2172         if (!rdev->is_atom_bios) {
2173                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2174                 return -EINVAL;
2175         }
2176         r = radeon_atombios_init(rdev);
2177         if (r)
2178                 return r;
2179
2180         /* Post card if necessary */
2181         if (!radeon_card_posted(rdev)) {
2182                 if (!rdev->bios) {
2183                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2184                         return -EINVAL;
2185                 }
2186                 DRM_INFO("GPU not posted. posting now...\n");
2187                 atom_asic_init(rdev->mode_info.atom_context);
2188         }
2189         /* init golden registers */
2190         ni_init_golden_registers(rdev);
2191         /* Initialize scratch registers */
2192         r600_scratch_init(rdev);
2193         /* Initialize surface registers */
2194         radeon_surface_init(rdev);
2195         /* Initialize clocks */
2196         radeon_get_clock_info(rdev->ddev);
2197         /* Fence driver */
2198         r = radeon_fence_driver_init(rdev);
2199         if (r)
2200                 return r;
2201         /* initialize memory controller */
2202         r = evergreen_mc_init(rdev);
2203         if (r)
2204                 return r;
2205         /* Memory manager */
2206         r = radeon_bo_init(rdev);
2207         if (r)
2208                 return r;
2209
2210         ring->ring_obj = NULL;
2211         r600_ring_init(rdev, ring, 1024 * 1024);
2212
2213         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2214         ring->ring_obj = NULL;
2215         r600_ring_init(rdev, ring, 64 * 1024);
2216
2217         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2218         ring->ring_obj = NULL;
2219         r600_ring_init(rdev, ring, 64 * 1024);
2220
2221         r = radeon_uvd_init(rdev);
2222         if (!r) {
2223                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2224                 ring->ring_obj = NULL;
2225                 r600_ring_init(rdev, ring, 4096);
2226         }
2227
2228         rdev->ih.ring_obj = NULL;
2229         r600_ih_ring_init(rdev, 64 * 1024);
2230
2231         r = r600_pcie_gart_init(rdev);
2232         if (r)
2233                 return r;
2234
2235         rdev->accel_working = true;
2236         r = cayman_startup(rdev);
2237         if (r) {
2238                 dev_err(rdev->dev, "disabling GPU acceleration\n");
2239                 cayman_cp_fini(rdev);
2240                 cayman_dma_fini(rdev);
2241                 r600_irq_fini(rdev);
2242                 if (rdev->flags & RADEON_IS_IGP)
2243                         si_rlc_fini(rdev);
2244                 radeon_wb_fini(rdev);
2245                 radeon_ib_pool_fini(rdev);
2246                 radeon_vm_manager_fini(rdev);
2247                 radeon_irq_kms_fini(rdev);
2248                 cayman_pcie_gart_fini(rdev);
2249                 rdev->accel_working = false;
2250         }
2251
2252         /* Don't start up if the MC ucode is missing.
2253          * The default clocks and voltages before the MC ucode
2254          * is loaded are not suffient for advanced operations.
2255          *
2256          * We can skip this check for TN, because there is no MC
2257          * ucode.
2258          */
2259         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2260                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
2261                 return -EINVAL;
2262         }
2263
2264         return 0;
2265 }
2266
2267 void cayman_fini(struct radeon_device *rdev)
2268 {
2269         r600_blit_fini(rdev);
2270         cayman_cp_fini(rdev);
2271         cayman_dma_fini(rdev);
2272         r600_irq_fini(rdev);
2273         if (rdev->flags & RADEON_IS_IGP)
2274                 si_rlc_fini(rdev);
2275         radeon_wb_fini(rdev);
2276         radeon_vm_manager_fini(rdev);
2277         radeon_ib_pool_fini(rdev);
2278         radeon_irq_kms_fini(rdev);
2279         r600_uvd_stop(rdev);
2280         radeon_uvd_fini(rdev);
2281         cayman_pcie_gart_fini(rdev);
2282         r600_vram_scratch_fini(rdev);
2283         radeon_gem_fini(rdev);
2284         radeon_fence_driver_fini(rdev);
2285         radeon_bo_fini(rdev);
2286         radeon_atombios_fini(rdev);
2287         kfree(rdev->bios);
2288         rdev->bios = NULL;
2289 }
2290
2291 /*
2292  * vm
2293  */
2294 int cayman_vm_init(struct radeon_device *rdev)
2295 {
2296         /* number of VMs */
2297         rdev->vm_manager.nvm = 8;
2298         /* base offset of vram pages */
2299         if (rdev->flags & RADEON_IS_IGP) {
2300                 u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2301                 tmp <<= 22;
2302                 rdev->vm_manager.vram_base_offset = tmp;
2303         } else
2304                 rdev->vm_manager.vram_base_offset = 0;
2305         return 0;
2306 }
2307
2308 void cayman_vm_fini(struct radeon_device *rdev)
2309 {
2310 }
2311
2312 #define R600_ENTRY_VALID   (1 << 0)
2313 #define R600_PTE_SYSTEM    (1 << 1)
2314 #define R600_PTE_SNOOPED   (1 << 2)
2315 #define R600_PTE_READABLE  (1 << 5)
2316 #define R600_PTE_WRITEABLE (1 << 6)
2317
2318 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2319 {
2320         uint32_t r600_flags = 0;
2321         r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2322         r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2323         r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2324         if (flags & RADEON_VM_PAGE_SYSTEM) {
2325                 r600_flags |= R600_PTE_SYSTEM;
2326                 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2327         }
2328         return r600_flags;
2329 }
2330
2331 /**
2332  * cayman_vm_set_page - update the page tables using the CP
2333  *
2334  * @rdev: radeon_device pointer
2335  * @ib: indirect buffer to fill with commands
2336  * @pe: addr of the page entry
2337  * @addr: dst addr to write into pe
2338  * @count: number of page entries to update
2339  * @incr: increase next addr by incr bytes
2340  * @flags: access flags
2341  *
2342  * Update the page tables using the CP (cayman/TN).
2343  */
2344 void cayman_vm_set_page(struct radeon_device *rdev,
2345                         struct radeon_ib *ib,
2346                         uint64_t pe,
2347                         uint64_t addr, unsigned count,
2348                         uint32_t incr, uint32_t flags)
2349 {
2350         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2351         uint64_t value;
2352         unsigned ndw;
2353
2354         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2355                 while (count) {
2356                         ndw = 1 + count * 2;
2357                         if (ndw > 0x3FFF)
2358                                 ndw = 0x3FFF;
2359
2360                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2361                         ib->ptr[ib->length_dw++] = pe;
2362                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2363                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2364                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
2365                                         value = radeon_vm_map_gart(rdev, addr);
2366                                         value &= 0xFFFFFFFFFFFFF000ULL;
2367                                 } else if (flags & RADEON_VM_PAGE_VALID) {
2368                                         value = addr;
2369                                 } else {
2370                                         value = 0;
2371                                 }
2372                                 addr += incr;
2373                                 value |= r600_flags;
2374                                 ib->ptr[ib->length_dw++] = value;
2375                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2376                         }
2377                 }
2378         } else {
2379                 if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2380                     (count == 1)) {
2381                         while (count) {
2382                                 ndw = count * 2;
2383                                 if (ndw > 0xFFFFE)
2384                                         ndw = 0xFFFFE;
2385
2386                                 /* for non-physically contiguous pages (system) */
2387                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2388                                 ib->ptr[ib->length_dw++] = pe;
2389                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2390                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2391                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
2392                                                 value = radeon_vm_map_gart(rdev, addr);
2393                                                 value &= 0xFFFFFFFFFFFFF000ULL;
2394                                         } else if (flags & RADEON_VM_PAGE_VALID) {
2395                                                 value = addr;
2396                                         } else {
2397                                                 value = 0;
2398                                         }
2399                                         addr += incr;
2400                                         value |= r600_flags;
2401                                         ib->ptr[ib->length_dw++] = value;
2402                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
2403                                 }
2404                         }
2405                         while (ib->length_dw & 0x7)
2406                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2407                 } else {
2408                         while (count) {
2409                                 ndw = count * 2;
2410                                 if (ndw > 0xFFFFE)
2411                                         ndw = 0xFFFFE;
2412
2413                                 if (flags & RADEON_VM_PAGE_VALID)
2414                                         value = addr;
2415                                 else
2416                                         value = 0;
2417                                 /* for physically contiguous pages (vram) */
2418                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2419                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2420                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2421                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2422                                 ib->ptr[ib->length_dw++] = 0;
2423                                 ib->ptr[ib->length_dw++] = value; /* value */
2424                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2425                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
2426                                 ib->ptr[ib->length_dw++] = 0;
2427                                 pe += ndw * 4;
2428                                 addr += (ndw / 2) * incr;
2429                                 count -= ndw / 2;
2430                         }
2431                 }
2432                 while (ib->length_dw & 0x7)
2433                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2434         }
2435 }
2436
2437 /**
2438  * cayman_vm_flush - vm flush using the CP
2439  *
2440  * @rdev: radeon_device pointer
2441  *
2442  * Update the page table base and flush the VM TLB
2443  * using the CP (cayman-si).
2444  */
2445 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2446 {
2447         struct radeon_ring *ring = &rdev->ring[ridx];
2448
2449         if (vm == NULL)
2450                 return;
2451
2452         radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2453         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2454
2455         /* flush hdp cache */
2456         radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2457         radeon_ring_write(ring, 0x1);
2458
2459         /* bits 0-7 are the VM contexts0-7 */
2460         radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2461         radeon_ring_write(ring, 1 << vm->id);
2462
2463         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2464         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2465         radeon_ring_write(ring, 0x0);
2466 }
2467
2468 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2469 {
2470         struct radeon_ring *ring = &rdev->ring[ridx];
2471
2472         if (vm == NULL)
2473                 return;
2474
2475         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2476         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2477         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2478
2479         /* flush hdp cache */
2480         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2481         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2482         radeon_ring_write(ring, 1);
2483
2484         /* bits 0-7 are the VM contexts0-7 */
2485         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2486         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2487         radeon_ring_write(ring, 1 << vm->id);
2488 }
2489