Merge remote-tracking branch 'lsk/v3.10/topic/big.LITTLE' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / ni.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "nid.h"
33 #include "atom.h"
34 #include "ni_reg.h"
35 #include "cayman_blit_shaders.h"
36
37 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
38 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
39 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
40 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
41 extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
42 extern void evergreen_mc_program(struct radeon_device *rdev);
43 extern void evergreen_irq_suspend(struct radeon_device *rdev);
44 extern int evergreen_mc_init(struct radeon_device *rdev);
45 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
46 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
47 extern void si_rlc_fini(struct radeon_device *rdev);
48 extern int si_rlc_init(struct radeon_device *rdev);
49
50 #define EVERGREEN_PFP_UCODE_SIZE 1120
51 #define EVERGREEN_PM4_UCODE_SIZE 1376
52 #define EVERGREEN_RLC_UCODE_SIZE 768
53 #define BTC_MC_UCODE_SIZE 6024
54
55 #define CAYMAN_PFP_UCODE_SIZE 2176
56 #define CAYMAN_PM4_UCODE_SIZE 2176
57 #define CAYMAN_RLC_UCODE_SIZE 1024
58 #define CAYMAN_MC_UCODE_SIZE 6037
59
60 #define ARUBA_RLC_UCODE_SIZE 1536
61
62 /* Firmware Names */
63 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
64 MODULE_FIRMWARE("radeon/BARTS_me.bin");
65 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
66 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
67 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
68 MODULE_FIRMWARE("radeon/TURKS_me.bin");
69 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
70 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
71 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
72 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
73 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
74 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
75 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
76 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
77 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
78 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
79 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
80
81
82 static const u32 cayman_golden_registers2[] =
83 {
84         0x3e5c, 0xffffffff, 0x00000000,
85         0x3e48, 0xffffffff, 0x00000000,
86         0x3e4c, 0xffffffff, 0x00000000,
87         0x3e64, 0xffffffff, 0x00000000,
88         0x3e50, 0xffffffff, 0x00000000,
89         0x3e60, 0xffffffff, 0x00000000
90 };
91
92 static const u32 cayman_golden_registers[] =
93 {
94         0x5eb4, 0xffffffff, 0x00000002,
95         0x5e78, 0x8f311ff1, 0x001000f0,
96         0x3f90, 0xffff0000, 0xff000000,
97         0x9148, 0xffff0000, 0xff000000,
98         0x3f94, 0xffff0000, 0xff000000,
99         0x914c, 0xffff0000, 0xff000000,
100         0xc78, 0x00000080, 0x00000080,
101         0xbd4, 0x70073777, 0x00011003,
102         0xd02c, 0xbfffff1f, 0x08421000,
103         0xd0b8, 0x73773777, 0x02011003,
104         0x5bc0, 0x00200000, 0x50100000,
105         0x98f8, 0x33773777, 0x02011003,
106         0x98fc, 0xffffffff, 0x76541032,
107         0x7030, 0x31000311, 0x00000011,
108         0x2f48, 0x33773777, 0x42010001,
109         0x6b28, 0x00000010, 0x00000012,
110         0x7728, 0x00000010, 0x00000012,
111         0x10328, 0x00000010, 0x00000012,
112         0x10f28, 0x00000010, 0x00000012,
113         0x11b28, 0x00000010, 0x00000012,
114         0x12728, 0x00000010, 0x00000012,
115         0x240c, 0x000007ff, 0x00000000,
116         0x8a14, 0xf000001f, 0x00000007,
117         0x8b24, 0x3fff3fff, 0x00ff0fff,
118         0x8b10, 0x0000ff0f, 0x00000000,
119         0x28a4c, 0x07ffffff, 0x06000000,
120         0x10c, 0x00000001, 0x00010003,
121         0xa02c, 0xffffffff, 0x0000009b,
122         0x913c, 0x0000010f, 0x01000100,
123         0x8c04, 0xf8ff00ff, 0x40600060,
124         0x28350, 0x00000f01, 0x00000000,
125         0x9508, 0x3700001f, 0x00000002,
126         0x960c, 0xffffffff, 0x54763210,
127         0x88c4, 0x001f3ae3, 0x00000082,
128         0x88d0, 0xffffffff, 0x0f40df40,
129         0x88d4, 0x0000001f, 0x00000010,
130         0x8974, 0xffffffff, 0x00000000
131 };
132
133 static const u32 dvst_golden_registers2[] =
134 {
135         0x8f8, 0xffffffff, 0,
136         0x8fc, 0x00380000, 0,
137         0x8f8, 0xffffffff, 1,
138         0x8fc, 0x0e000000, 0
139 };
140
141 static const u32 dvst_golden_registers[] =
142 {
143         0x690, 0x3fff3fff, 0x20c00033,
144         0x918c, 0x0fff0fff, 0x00010006,
145         0x91a8, 0x0fff0fff, 0x00010006,
146         0x9150, 0xffffdfff, 0x6e944040,
147         0x917c, 0x0fff0fff, 0x00030002,
148         0x9198, 0x0fff0fff, 0x00030002,
149         0x915c, 0x0fff0fff, 0x00010000,
150         0x3f90, 0xffff0001, 0xff000000,
151         0x9178, 0x0fff0fff, 0x00070000,
152         0x9194, 0x0fff0fff, 0x00070000,
153         0x9148, 0xffff0001, 0xff000000,
154         0x9190, 0x0fff0fff, 0x00090008,
155         0x91ac, 0x0fff0fff, 0x00090008,
156         0x3f94, 0xffff0000, 0xff000000,
157         0x914c, 0xffff0000, 0xff000000,
158         0x929c, 0x00000fff, 0x00000001,
159         0x55e4, 0xff607fff, 0xfc000100,
160         0x8a18, 0xff000fff, 0x00000100,
161         0x8b28, 0xff000fff, 0x00000100,
162         0x9144, 0xfffc0fff, 0x00000100,
163         0x6ed8, 0x00010101, 0x00010000,
164         0x9830, 0xffffffff, 0x00000000,
165         0x9834, 0xf00fffff, 0x00000400,
166         0x9838, 0xfffffffe, 0x00000000,
167         0xd0c0, 0xff000fff, 0x00000100,
168         0xd02c, 0xbfffff1f, 0x08421000,
169         0xd0b8, 0x73773777, 0x12010001,
170         0x5bb0, 0x000000f0, 0x00000070,
171         0x98f8, 0x73773777, 0x12010001,
172         0x98fc, 0xffffffff, 0x00000010,
173         0x9b7c, 0x00ff0000, 0x00fc0000,
174         0x8030, 0x00001f0f, 0x0000100a,
175         0x2f48, 0x73773777, 0x12010001,
176         0x2408, 0x00030000, 0x000c007f,
177         0x8a14, 0xf000003f, 0x00000007,
178         0x8b24, 0x3fff3fff, 0x00ff0fff,
179         0x8b10, 0x0000ff0f, 0x00000000,
180         0x28a4c, 0x07ffffff, 0x06000000,
181         0x4d8, 0x00000fff, 0x00000100,
182         0xa008, 0xffffffff, 0x00010000,
183         0x913c, 0xffff03ff, 0x01000100,
184         0x8c00, 0x000000ff, 0x00000003,
185         0x8c04, 0xf8ff00ff, 0x40600060,
186         0x8cf0, 0x1fff1fff, 0x08e00410,
187         0x28350, 0x00000f01, 0x00000000,
188         0x9508, 0xf700071f, 0x00000002,
189         0x960c, 0xffffffff, 0x54763210,
190         0x20ef8, 0x01ff01ff, 0x00000002,
191         0x20e98, 0xfffffbff, 0x00200000,
192         0x2015c, 0xffffffff, 0x00000f40,
193         0x88c4, 0x001f3ae3, 0x00000082,
194         0x8978, 0x3fffffff, 0x04050140,
195         0x88d4, 0x0000001f, 0x00000010,
196         0x8974, 0xffffffff, 0x00000000
197 };
198
199 static const u32 scrapper_golden_registers[] =
200 {
201         0x690, 0x3fff3fff, 0x20c00033,
202         0x918c, 0x0fff0fff, 0x00010006,
203         0x918c, 0x0fff0fff, 0x00010006,
204         0x91a8, 0x0fff0fff, 0x00010006,
205         0x91a8, 0x0fff0fff, 0x00010006,
206         0x9150, 0xffffdfff, 0x6e944040,
207         0x9150, 0xffffdfff, 0x6e944040,
208         0x917c, 0x0fff0fff, 0x00030002,
209         0x917c, 0x0fff0fff, 0x00030002,
210         0x9198, 0x0fff0fff, 0x00030002,
211         0x9198, 0x0fff0fff, 0x00030002,
212         0x915c, 0x0fff0fff, 0x00010000,
213         0x915c, 0x0fff0fff, 0x00010000,
214         0x3f90, 0xffff0001, 0xff000000,
215         0x3f90, 0xffff0001, 0xff000000,
216         0x9178, 0x0fff0fff, 0x00070000,
217         0x9178, 0x0fff0fff, 0x00070000,
218         0x9194, 0x0fff0fff, 0x00070000,
219         0x9194, 0x0fff0fff, 0x00070000,
220         0x9148, 0xffff0001, 0xff000000,
221         0x9148, 0xffff0001, 0xff000000,
222         0x9190, 0x0fff0fff, 0x00090008,
223         0x9190, 0x0fff0fff, 0x00090008,
224         0x91ac, 0x0fff0fff, 0x00090008,
225         0x91ac, 0x0fff0fff, 0x00090008,
226         0x3f94, 0xffff0000, 0xff000000,
227         0x3f94, 0xffff0000, 0xff000000,
228         0x914c, 0xffff0000, 0xff000000,
229         0x914c, 0xffff0000, 0xff000000,
230         0x929c, 0x00000fff, 0x00000001,
231         0x929c, 0x00000fff, 0x00000001,
232         0x55e4, 0xff607fff, 0xfc000100,
233         0x8a18, 0xff000fff, 0x00000100,
234         0x8a18, 0xff000fff, 0x00000100,
235         0x8b28, 0xff000fff, 0x00000100,
236         0x8b28, 0xff000fff, 0x00000100,
237         0x9144, 0xfffc0fff, 0x00000100,
238         0x9144, 0xfffc0fff, 0x00000100,
239         0x6ed8, 0x00010101, 0x00010000,
240         0x9830, 0xffffffff, 0x00000000,
241         0x9830, 0xffffffff, 0x00000000,
242         0x9834, 0xf00fffff, 0x00000400,
243         0x9834, 0xf00fffff, 0x00000400,
244         0x9838, 0xfffffffe, 0x00000000,
245         0x9838, 0xfffffffe, 0x00000000,
246         0xd0c0, 0xff000fff, 0x00000100,
247         0xd02c, 0xbfffff1f, 0x08421000,
248         0xd02c, 0xbfffff1f, 0x08421000,
249         0xd0b8, 0x73773777, 0x12010001,
250         0xd0b8, 0x73773777, 0x12010001,
251         0x5bb0, 0x000000f0, 0x00000070,
252         0x98f8, 0x73773777, 0x12010001,
253         0x98f8, 0x73773777, 0x12010001,
254         0x98fc, 0xffffffff, 0x00000010,
255         0x98fc, 0xffffffff, 0x00000010,
256         0x9b7c, 0x00ff0000, 0x00fc0000,
257         0x9b7c, 0x00ff0000, 0x00fc0000,
258         0x8030, 0x00001f0f, 0x0000100a,
259         0x8030, 0x00001f0f, 0x0000100a,
260         0x2f48, 0x73773777, 0x12010001,
261         0x2f48, 0x73773777, 0x12010001,
262         0x2408, 0x00030000, 0x000c007f,
263         0x8a14, 0xf000003f, 0x00000007,
264         0x8a14, 0xf000003f, 0x00000007,
265         0x8b24, 0x3fff3fff, 0x00ff0fff,
266         0x8b24, 0x3fff3fff, 0x00ff0fff,
267         0x8b10, 0x0000ff0f, 0x00000000,
268         0x8b10, 0x0000ff0f, 0x00000000,
269         0x28a4c, 0x07ffffff, 0x06000000,
270         0x28a4c, 0x07ffffff, 0x06000000,
271         0x4d8, 0x00000fff, 0x00000100,
272         0x4d8, 0x00000fff, 0x00000100,
273         0xa008, 0xffffffff, 0x00010000,
274         0xa008, 0xffffffff, 0x00010000,
275         0x913c, 0xffff03ff, 0x01000100,
276         0x913c, 0xffff03ff, 0x01000100,
277         0x90e8, 0x001fffff, 0x010400c0,
278         0x8c00, 0x000000ff, 0x00000003,
279         0x8c00, 0x000000ff, 0x00000003,
280         0x8c04, 0xf8ff00ff, 0x40600060,
281         0x8c04, 0xf8ff00ff, 0x40600060,
282         0x8c30, 0x0000000f, 0x00040005,
283         0x8cf0, 0x1fff1fff, 0x08e00410,
284         0x8cf0, 0x1fff1fff, 0x08e00410,
285         0x900c, 0x00ffffff, 0x0017071f,
286         0x28350, 0x00000f01, 0x00000000,
287         0x28350, 0x00000f01, 0x00000000,
288         0x9508, 0xf700071f, 0x00000002,
289         0x9508, 0xf700071f, 0x00000002,
290         0x9688, 0x00300000, 0x0017000f,
291         0x960c, 0xffffffff, 0x54763210,
292         0x960c, 0xffffffff, 0x54763210,
293         0x20ef8, 0x01ff01ff, 0x00000002,
294         0x20e98, 0xfffffbff, 0x00200000,
295         0x2015c, 0xffffffff, 0x00000f40,
296         0x88c4, 0x001f3ae3, 0x00000082,
297         0x88c4, 0x001f3ae3, 0x00000082,
298         0x8978, 0x3fffffff, 0x04050140,
299         0x8978, 0x3fffffff, 0x04050140,
300         0x88d4, 0x0000001f, 0x00000010,
301         0x88d4, 0x0000001f, 0x00000010,
302         0x8974, 0xffffffff, 0x00000000,
303         0x8974, 0xffffffff, 0x00000000
304 };
305
306 static void ni_init_golden_registers(struct radeon_device *rdev)
307 {
308         switch (rdev->family) {
309         case CHIP_CAYMAN:
310                 radeon_program_register_sequence(rdev,
311                                                  cayman_golden_registers,
312                                                  (const u32)ARRAY_SIZE(cayman_golden_registers));
313                 radeon_program_register_sequence(rdev,
314                                                  cayman_golden_registers2,
315                                                  (const u32)ARRAY_SIZE(cayman_golden_registers2));
316                 break;
317         case CHIP_ARUBA:
318                 if ((rdev->pdev->device == 0x9900) ||
319                     (rdev->pdev->device == 0x9901) ||
320                     (rdev->pdev->device == 0x9903) ||
321                     (rdev->pdev->device == 0x9904) ||
322                     (rdev->pdev->device == 0x9905) ||
323                     (rdev->pdev->device == 0x9906) ||
324                     (rdev->pdev->device == 0x9907) ||
325                     (rdev->pdev->device == 0x9908) ||
326                     (rdev->pdev->device == 0x9909) ||
327                     (rdev->pdev->device == 0x990A) ||
328                     (rdev->pdev->device == 0x990B) ||
329                     (rdev->pdev->device == 0x990C) ||
330                     (rdev->pdev->device == 0x990D) ||
331                     (rdev->pdev->device == 0x990E) ||
332                     (rdev->pdev->device == 0x990F) ||
333                     (rdev->pdev->device == 0x9910) ||
334                     (rdev->pdev->device == 0x9913) ||
335                     (rdev->pdev->device == 0x9917) ||
336                     (rdev->pdev->device == 0x9918)) {
337                         radeon_program_register_sequence(rdev,
338                                                          dvst_golden_registers,
339                                                          (const u32)ARRAY_SIZE(dvst_golden_registers));
340                         radeon_program_register_sequence(rdev,
341                                                          dvst_golden_registers2,
342                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
343                 } else {
344                         radeon_program_register_sequence(rdev,
345                                                          scrapper_golden_registers,
346                                                          (const u32)ARRAY_SIZE(scrapper_golden_registers));
347                         radeon_program_register_sequence(rdev,
348                                                          dvst_golden_registers2,
349                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
350                 }
351                 break;
352         default:
353                 break;
354         }
355 }
356
357 #define BTC_IO_MC_REGS_SIZE 29
358
359 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
360         {0x00000077, 0xff010100},
361         {0x00000078, 0x00000000},
362         {0x00000079, 0x00001434},
363         {0x0000007a, 0xcc08ec08},
364         {0x0000007b, 0x00040000},
365         {0x0000007c, 0x000080c0},
366         {0x0000007d, 0x09000000},
367         {0x0000007e, 0x00210404},
368         {0x00000081, 0x08a8e800},
369         {0x00000082, 0x00030444},
370         {0x00000083, 0x00000000},
371         {0x00000085, 0x00000001},
372         {0x00000086, 0x00000002},
373         {0x00000087, 0x48490000},
374         {0x00000088, 0x20244647},
375         {0x00000089, 0x00000005},
376         {0x0000008b, 0x66030000},
377         {0x0000008c, 0x00006603},
378         {0x0000008d, 0x00000100},
379         {0x0000008f, 0x00001c0a},
380         {0x00000090, 0xff000001},
381         {0x00000094, 0x00101101},
382         {0x00000095, 0x00000fff},
383         {0x00000096, 0x00116fff},
384         {0x00000097, 0x60010000},
385         {0x00000098, 0x10010000},
386         {0x00000099, 0x00006000},
387         {0x0000009a, 0x00001000},
388         {0x0000009f, 0x00946a00}
389 };
390
391 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
392         {0x00000077, 0xff010100},
393         {0x00000078, 0x00000000},
394         {0x00000079, 0x00001434},
395         {0x0000007a, 0xcc08ec08},
396         {0x0000007b, 0x00040000},
397         {0x0000007c, 0x000080c0},
398         {0x0000007d, 0x09000000},
399         {0x0000007e, 0x00210404},
400         {0x00000081, 0x08a8e800},
401         {0x00000082, 0x00030444},
402         {0x00000083, 0x00000000},
403         {0x00000085, 0x00000001},
404         {0x00000086, 0x00000002},
405         {0x00000087, 0x48490000},
406         {0x00000088, 0x20244647},
407         {0x00000089, 0x00000005},
408         {0x0000008b, 0x66030000},
409         {0x0000008c, 0x00006603},
410         {0x0000008d, 0x00000100},
411         {0x0000008f, 0x00001c0a},
412         {0x00000090, 0xff000001},
413         {0x00000094, 0x00101101},
414         {0x00000095, 0x00000fff},
415         {0x00000096, 0x00116fff},
416         {0x00000097, 0x60010000},
417         {0x00000098, 0x10010000},
418         {0x00000099, 0x00006000},
419         {0x0000009a, 0x00001000},
420         {0x0000009f, 0x00936a00}
421 };
422
423 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
424         {0x00000077, 0xff010100},
425         {0x00000078, 0x00000000},
426         {0x00000079, 0x00001434},
427         {0x0000007a, 0xcc08ec08},
428         {0x0000007b, 0x00040000},
429         {0x0000007c, 0x000080c0},
430         {0x0000007d, 0x09000000},
431         {0x0000007e, 0x00210404},
432         {0x00000081, 0x08a8e800},
433         {0x00000082, 0x00030444},
434         {0x00000083, 0x00000000},
435         {0x00000085, 0x00000001},
436         {0x00000086, 0x00000002},
437         {0x00000087, 0x48490000},
438         {0x00000088, 0x20244647},
439         {0x00000089, 0x00000005},
440         {0x0000008b, 0x66030000},
441         {0x0000008c, 0x00006603},
442         {0x0000008d, 0x00000100},
443         {0x0000008f, 0x00001c0a},
444         {0x00000090, 0xff000001},
445         {0x00000094, 0x00101101},
446         {0x00000095, 0x00000fff},
447         {0x00000096, 0x00116fff},
448         {0x00000097, 0x60010000},
449         {0x00000098, 0x10010000},
450         {0x00000099, 0x00006000},
451         {0x0000009a, 0x00001000},
452         {0x0000009f, 0x00916a00}
453 };
454
455 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
456         {0x00000077, 0xff010100},
457         {0x00000078, 0x00000000},
458         {0x00000079, 0x00001434},
459         {0x0000007a, 0xcc08ec08},
460         {0x0000007b, 0x00040000},
461         {0x0000007c, 0x000080c0},
462         {0x0000007d, 0x09000000},
463         {0x0000007e, 0x00210404},
464         {0x00000081, 0x08a8e800},
465         {0x00000082, 0x00030444},
466         {0x00000083, 0x00000000},
467         {0x00000085, 0x00000001},
468         {0x00000086, 0x00000002},
469         {0x00000087, 0x48490000},
470         {0x00000088, 0x20244647},
471         {0x00000089, 0x00000005},
472         {0x0000008b, 0x66030000},
473         {0x0000008c, 0x00006603},
474         {0x0000008d, 0x00000100},
475         {0x0000008f, 0x00001c0a},
476         {0x00000090, 0xff000001},
477         {0x00000094, 0x00101101},
478         {0x00000095, 0x00000fff},
479         {0x00000096, 0x00116fff},
480         {0x00000097, 0x60010000},
481         {0x00000098, 0x10010000},
482         {0x00000099, 0x00006000},
483         {0x0000009a, 0x00001000},
484         {0x0000009f, 0x00976b00}
485 };
486
487 int ni_mc_load_microcode(struct radeon_device *rdev)
488 {
489         const __be32 *fw_data;
490         u32 mem_type, running, blackout = 0;
491         u32 *io_mc_regs;
492         int i, ucode_size, regs_size;
493
494         if (!rdev->mc_fw)
495                 return -EINVAL;
496
497         switch (rdev->family) {
498         case CHIP_BARTS:
499                 io_mc_regs = (u32 *)&barts_io_mc_regs;
500                 ucode_size = BTC_MC_UCODE_SIZE;
501                 regs_size = BTC_IO_MC_REGS_SIZE;
502                 break;
503         case CHIP_TURKS:
504                 io_mc_regs = (u32 *)&turks_io_mc_regs;
505                 ucode_size = BTC_MC_UCODE_SIZE;
506                 regs_size = BTC_IO_MC_REGS_SIZE;
507                 break;
508         case CHIP_CAICOS:
509         default:
510                 io_mc_regs = (u32 *)&caicos_io_mc_regs;
511                 ucode_size = BTC_MC_UCODE_SIZE;
512                 regs_size = BTC_IO_MC_REGS_SIZE;
513                 break;
514         case CHIP_CAYMAN:
515                 io_mc_regs = (u32 *)&cayman_io_mc_regs;
516                 ucode_size = CAYMAN_MC_UCODE_SIZE;
517                 regs_size = BTC_IO_MC_REGS_SIZE;
518                 break;
519         }
520
521         mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
522         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
523
524         if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
525                 if (running) {
526                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
527                         WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
528                 }
529
530                 /* reset the engine and set to writable */
531                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
532                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
533
534                 /* load mc io regs */
535                 for (i = 0; i < regs_size; i++) {
536                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
537                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
538                 }
539                 /* load the MC ucode */
540                 fw_data = (const __be32 *)rdev->mc_fw->data;
541                 for (i = 0; i < ucode_size; i++)
542                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
543
544                 /* put the engine back into the active state */
545                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
546                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
547                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
548
549                 /* wait for training to complete */
550                 for (i = 0; i < rdev->usec_timeout; i++) {
551                         if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
552                                 break;
553                         udelay(1);
554                 }
555
556                 if (running)
557                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
558         }
559
560         return 0;
561 }
562
563 int ni_init_microcode(struct radeon_device *rdev)
564 {
565         struct platform_device *pdev;
566         const char *chip_name;
567         const char *rlc_chip_name;
568         size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
569         char fw_name[30];
570         int err;
571
572         DRM_DEBUG("\n");
573
574         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
575         err = IS_ERR(pdev);
576         if (err) {
577                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
578                 return -EINVAL;
579         }
580
581         switch (rdev->family) {
582         case CHIP_BARTS:
583                 chip_name = "BARTS";
584                 rlc_chip_name = "BTC";
585                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
586                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
587                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
588                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
589                 break;
590         case CHIP_TURKS:
591                 chip_name = "TURKS";
592                 rlc_chip_name = "BTC";
593                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
594                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
595                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
596                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
597                 break;
598         case CHIP_CAICOS:
599                 chip_name = "CAICOS";
600                 rlc_chip_name = "BTC";
601                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
602                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
603                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
604                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
605                 break;
606         case CHIP_CAYMAN:
607                 chip_name = "CAYMAN";
608                 rlc_chip_name = "CAYMAN";
609                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
610                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
611                 rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
612                 mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
613                 break;
614         case CHIP_ARUBA:
615                 chip_name = "ARUBA";
616                 rlc_chip_name = "ARUBA";
617                 /* pfp/me same size as CAYMAN */
618                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
619                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
620                 rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
621                 mc_req_size = 0;
622                 break;
623         default: BUG();
624         }
625
626         DRM_INFO("Loading %s Microcode\n", chip_name);
627
628         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
629         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
630         if (err)
631                 goto out;
632         if (rdev->pfp_fw->size != pfp_req_size) {
633                 printk(KERN_ERR
634                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
635                        rdev->pfp_fw->size, fw_name);
636                 err = -EINVAL;
637                 goto out;
638         }
639
640         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
641         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
642         if (err)
643                 goto out;
644         if (rdev->me_fw->size != me_req_size) {
645                 printk(KERN_ERR
646                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
647                        rdev->me_fw->size, fw_name);
648                 err = -EINVAL;
649         }
650
651         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
652         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
653         if (err)
654                 goto out;
655         if (rdev->rlc_fw->size != rlc_req_size) {
656                 printk(KERN_ERR
657                        "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
658                        rdev->rlc_fw->size, fw_name);
659                 err = -EINVAL;
660         }
661
662         /* no MC ucode on TN */
663         if (!(rdev->flags & RADEON_IS_IGP)) {
664                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
665                 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
666                 if (err)
667                         goto out;
668                 if (rdev->mc_fw->size != mc_req_size) {
669                         printk(KERN_ERR
670                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
671                                rdev->mc_fw->size, fw_name);
672                         err = -EINVAL;
673                 }
674         }
675 out:
676         platform_device_unregister(pdev);
677
678         if (err) {
679                 if (err != -EINVAL)
680                         printk(KERN_ERR
681                                "ni_cp: Failed to load firmware \"%s\"\n",
682                                fw_name);
683                 release_firmware(rdev->pfp_fw);
684                 rdev->pfp_fw = NULL;
685                 release_firmware(rdev->me_fw);
686                 rdev->me_fw = NULL;
687                 release_firmware(rdev->rlc_fw);
688                 rdev->rlc_fw = NULL;
689                 release_firmware(rdev->mc_fw);
690                 rdev->mc_fw = NULL;
691         }
692         return err;
693 }
694
695 /*
696  * Core functions
697  */
698 static void cayman_gpu_init(struct radeon_device *rdev)
699 {
700         u32 gb_addr_config = 0;
701         u32 mc_shared_chmap, mc_arb_ramcfg;
702         u32 cgts_tcc_disable;
703         u32 sx_debug_1;
704         u32 smx_dc_ctl0;
705         u32 cgts_sm_ctrl_reg;
706         u32 hdp_host_path_cntl;
707         u32 tmp;
708         u32 disabled_rb_mask;
709         int i, j;
710
711         switch (rdev->family) {
712         case CHIP_CAYMAN:
713                 rdev->config.cayman.max_shader_engines = 2;
714                 rdev->config.cayman.max_pipes_per_simd = 4;
715                 rdev->config.cayman.max_tile_pipes = 8;
716                 rdev->config.cayman.max_simds_per_se = 12;
717                 rdev->config.cayman.max_backends_per_se = 4;
718                 rdev->config.cayman.max_texture_channel_caches = 8;
719                 rdev->config.cayman.max_gprs = 256;
720                 rdev->config.cayman.max_threads = 256;
721                 rdev->config.cayman.max_gs_threads = 32;
722                 rdev->config.cayman.max_stack_entries = 512;
723                 rdev->config.cayman.sx_num_of_sets = 8;
724                 rdev->config.cayman.sx_max_export_size = 256;
725                 rdev->config.cayman.sx_max_export_pos_size = 64;
726                 rdev->config.cayman.sx_max_export_smx_size = 192;
727                 rdev->config.cayman.max_hw_contexts = 8;
728                 rdev->config.cayman.sq_num_cf_insts = 2;
729
730                 rdev->config.cayman.sc_prim_fifo_size = 0x100;
731                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
732                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
733                 gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
734                 break;
735         case CHIP_ARUBA:
736         default:
737                 rdev->config.cayman.max_shader_engines = 1;
738                 rdev->config.cayman.max_pipes_per_simd = 4;
739                 rdev->config.cayman.max_tile_pipes = 2;
740                 if ((rdev->pdev->device == 0x9900) ||
741                     (rdev->pdev->device == 0x9901) ||
742                     (rdev->pdev->device == 0x9905) ||
743                     (rdev->pdev->device == 0x9906) ||
744                     (rdev->pdev->device == 0x9907) ||
745                     (rdev->pdev->device == 0x9908) ||
746                     (rdev->pdev->device == 0x9909) ||
747                     (rdev->pdev->device == 0x990B) ||
748                     (rdev->pdev->device == 0x990C) ||
749                     (rdev->pdev->device == 0x990F) ||
750                     (rdev->pdev->device == 0x9910) ||
751                     (rdev->pdev->device == 0x9917) ||
752                     (rdev->pdev->device == 0x9999) ||
753                     (rdev->pdev->device == 0x999C)) {
754                         rdev->config.cayman.max_simds_per_se = 6;
755                         rdev->config.cayman.max_backends_per_se = 2;
756                         rdev->config.cayman.max_hw_contexts = 8;
757                         rdev->config.cayman.sx_max_export_size = 256;
758                         rdev->config.cayman.sx_max_export_pos_size = 64;
759                         rdev->config.cayman.sx_max_export_smx_size = 192;
760                 } else if ((rdev->pdev->device == 0x9903) ||
761                            (rdev->pdev->device == 0x9904) ||
762                            (rdev->pdev->device == 0x990A) ||
763                            (rdev->pdev->device == 0x990D) ||
764                            (rdev->pdev->device == 0x990E) ||
765                            (rdev->pdev->device == 0x9913) ||
766                            (rdev->pdev->device == 0x9918) ||
767                            (rdev->pdev->device == 0x999D)) {
768                         rdev->config.cayman.max_simds_per_se = 4;
769                         rdev->config.cayman.max_backends_per_se = 2;
770                         rdev->config.cayman.max_hw_contexts = 8;
771                         rdev->config.cayman.sx_max_export_size = 256;
772                         rdev->config.cayman.sx_max_export_pos_size = 64;
773                         rdev->config.cayman.sx_max_export_smx_size = 192;
774                 } else if ((rdev->pdev->device == 0x9919) ||
775                            (rdev->pdev->device == 0x9990) ||
776                            (rdev->pdev->device == 0x9991) ||
777                            (rdev->pdev->device == 0x9994) ||
778                            (rdev->pdev->device == 0x9995) ||
779                            (rdev->pdev->device == 0x9996) ||
780                            (rdev->pdev->device == 0x999A) ||
781                            (rdev->pdev->device == 0x99A0)) {
782                         rdev->config.cayman.max_simds_per_se = 3;
783                         rdev->config.cayman.max_backends_per_se = 1;
784                         rdev->config.cayman.max_hw_contexts = 4;
785                         rdev->config.cayman.sx_max_export_size = 128;
786                         rdev->config.cayman.sx_max_export_pos_size = 32;
787                         rdev->config.cayman.sx_max_export_smx_size = 96;
788                 } else {
789                         rdev->config.cayman.max_simds_per_se = 2;
790                         rdev->config.cayman.max_backends_per_se = 1;
791                         rdev->config.cayman.max_hw_contexts = 4;
792                         rdev->config.cayman.sx_max_export_size = 128;
793                         rdev->config.cayman.sx_max_export_pos_size = 32;
794                         rdev->config.cayman.sx_max_export_smx_size = 96;
795                 }
796                 rdev->config.cayman.max_texture_channel_caches = 2;
797                 rdev->config.cayman.max_gprs = 256;
798                 rdev->config.cayman.max_threads = 256;
799                 rdev->config.cayman.max_gs_threads = 32;
800                 rdev->config.cayman.max_stack_entries = 512;
801                 rdev->config.cayman.sx_num_of_sets = 8;
802                 rdev->config.cayman.sq_num_cf_insts = 2;
803
804                 rdev->config.cayman.sc_prim_fifo_size = 0x40;
805                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
806                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
807                 gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
808                 break;
809         }
810
811         /* Initialize HDP */
812         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
813                 WREG32((0x2c14 + j), 0x00000000);
814                 WREG32((0x2c18 + j), 0x00000000);
815                 WREG32((0x2c1c + j), 0x00000000);
816                 WREG32((0x2c20 + j), 0x00000000);
817                 WREG32((0x2c24 + j), 0x00000000);
818         }
819
820         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
821
822         evergreen_fix_pci_max_read_req_size(rdev);
823
824         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
825         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
826
827         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
828         rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
829         if (rdev->config.cayman.mem_row_size_in_kb > 4)
830                 rdev->config.cayman.mem_row_size_in_kb = 4;
831         /* XXX use MC settings? */
832         rdev->config.cayman.shader_engine_tile_size = 32;
833         rdev->config.cayman.num_gpus = 1;
834         rdev->config.cayman.multi_gpu_tile_size = 64;
835
836         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
837         rdev->config.cayman.num_tile_pipes = (1 << tmp);
838         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
839         rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
840         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
841         rdev->config.cayman.num_shader_engines = tmp + 1;
842         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
843         rdev->config.cayman.num_gpus = tmp + 1;
844         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
845         rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
846         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
847         rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
848
849
850         /* setup tiling info dword.  gb_addr_config is not adequate since it does
851          * not have bank info, so create a custom tiling dword.
852          * bits 3:0   num_pipes
853          * bits 7:4   num_banks
854          * bits 11:8  group_size
855          * bits 15:12 row_size
856          */
857         rdev->config.cayman.tile_config = 0;
858         switch (rdev->config.cayman.num_tile_pipes) {
859         case 1:
860         default:
861                 rdev->config.cayman.tile_config |= (0 << 0);
862                 break;
863         case 2:
864                 rdev->config.cayman.tile_config |= (1 << 0);
865                 break;
866         case 4:
867                 rdev->config.cayman.tile_config |= (2 << 0);
868                 break;
869         case 8:
870                 rdev->config.cayman.tile_config |= (3 << 0);
871                 break;
872         }
873
874         /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
875         if (rdev->flags & RADEON_IS_IGP)
876                 rdev->config.cayman.tile_config |= 1 << 4;
877         else {
878                 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
879                 case 0: /* four banks */
880                         rdev->config.cayman.tile_config |= 0 << 4;
881                         break;
882                 case 1: /* eight banks */
883                         rdev->config.cayman.tile_config |= 1 << 4;
884                         break;
885                 case 2: /* sixteen banks */
886                 default:
887                         rdev->config.cayman.tile_config |= 2 << 4;
888                         break;
889                 }
890         }
891         rdev->config.cayman.tile_config |=
892                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
893         rdev->config.cayman.tile_config |=
894                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
895
896         tmp = 0;
897         for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
898                 u32 rb_disable_bitmap;
899
900                 WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
901                 WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
902                 rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
903                 tmp <<= 4;
904                 tmp |= rb_disable_bitmap;
905         }
906         /* enabled rb are just the one not disabled :) */
907         disabled_rb_mask = tmp;
908         tmp = 0;
909         for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
910                 tmp |= (1 << i);
911         /* if all the backends are disabled, fix it up here */
912         if ((disabled_rb_mask & tmp) == tmp) {
913                 for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
914                         disabled_rb_mask &= ~(1 << i);
915         }
916
917         WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
918         WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
919
920         WREG32(GB_ADDR_CONFIG, gb_addr_config);
921         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
922         if (ASIC_IS_DCE6(rdev))
923                 WREG32(DMIF_ADDR_CALC, gb_addr_config);
924         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
925         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
926         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
927         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
928         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
929         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
930
931         if ((rdev->config.cayman.max_backends_per_se == 1) &&
932             (rdev->flags & RADEON_IS_IGP)) {
933                 if ((disabled_rb_mask & 3) == 1) {
934                         /* RB0 disabled, RB1 enabled */
935                         tmp = 0x11111111;
936                 } else {
937                         /* RB1 disabled, RB0 enabled */
938                         tmp = 0x00000000;
939                 }
940         } else {
941                 tmp = gb_addr_config & NUM_PIPES_MASK;
942                 tmp = r6xx_remap_render_backend(rdev, tmp,
943                                                 rdev->config.cayman.max_backends_per_se *
944                                                 rdev->config.cayman.max_shader_engines,
945                                                 CAYMAN_MAX_BACKENDS, disabled_rb_mask);
946         }
947         WREG32(GB_BACKEND_MAP, tmp);
948
949         cgts_tcc_disable = 0xffff0000;
950         for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
951                 cgts_tcc_disable &= ~(1 << (16 + i));
952         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
953         WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
954         WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
955         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
956
957         /* reprogram the shader complex */
958         cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
959         for (i = 0; i < 16; i++)
960                 WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
961         WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
962
963         /* set HW defaults for 3D engine */
964         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
965
966         sx_debug_1 = RREG32(SX_DEBUG_1);
967         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
968         WREG32(SX_DEBUG_1, sx_debug_1);
969
970         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
971         smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
972         smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
973         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
974
975         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
976
977         /* need to be explicitly zero-ed */
978         WREG32(VGT_OFFCHIP_LDS_BASE, 0);
979         WREG32(SQ_LSTMP_RING_BASE, 0);
980         WREG32(SQ_HSTMP_RING_BASE, 0);
981         WREG32(SQ_ESTMP_RING_BASE, 0);
982         WREG32(SQ_GSTMP_RING_BASE, 0);
983         WREG32(SQ_VSTMP_RING_BASE, 0);
984         WREG32(SQ_PSTMP_RING_BASE, 0);
985
986         WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
987
988         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
989                                         POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
990                                         SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
991
992         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
993                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
994                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
995
996
997         WREG32(VGT_NUM_INSTANCES, 1);
998
999         WREG32(CP_PERFMON_CNTL, 0);
1000
1001         WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1002                                   FETCH_FIFO_HIWATER(0x4) |
1003                                   DONE_FIFO_HIWATER(0xe0) |
1004                                   ALU_UPDATE_FIFO_HIWATER(0x8)));
1005
1006         WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1007         WREG32(SQ_CONFIG, (VC_ENABLE |
1008                            EXPORT_SRC_C |
1009                            GFX_PRIO(0) |
1010                            CS1_PRIO(0) |
1011                            CS2_PRIO(1)));
1012         WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1013
1014         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1015                                           FORCE_EOV_MAX_REZ_CNT(255)));
1016
1017         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1018                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1019
1020         WREG32(VGT_GS_VERTEX_REUSE, 16);
1021         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1022
1023         WREG32(CB_PERF_CTR0_SEL_0, 0);
1024         WREG32(CB_PERF_CTR0_SEL_1, 0);
1025         WREG32(CB_PERF_CTR1_SEL_0, 0);
1026         WREG32(CB_PERF_CTR1_SEL_1, 0);
1027         WREG32(CB_PERF_CTR2_SEL_0, 0);
1028         WREG32(CB_PERF_CTR2_SEL_1, 0);
1029         WREG32(CB_PERF_CTR3_SEL_0, 0);
1030         WREG32(CB_PERF_CTR3_SEL_1, 0);
1031
1032         tmp = RREG32(HDP_MISC_CNTL);
1033         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1034         WREG32(HDP_MISC_CNTL, tmp);
1035
1036         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1037         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1038
1039         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1040
1041         udelay(50);
1042 }
1043
1044 /*
1045  * GART
1046  */
1047 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1048 {
1049         /* flush hdp cache */
1050         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1051
1052         /* bits 0-7 are the VM contexts0-7 */
1053         WREG32(VM_INVALIDATE_REQUEST, 1);
1054 }
1055
1056 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1057 {
1058         int i, r;
1059
1060         if (rdev->gart.robj == NULL) {
1061                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1062                 return -EINVAL;
1063         }
1064         r = radeon_gart_table_vram_pin(rdev);
1065         if (r)
1066                 return r;
1067         radeon_gart_restore(rdev);
1068         /* Setup TLB control */
1069         WREG32(MC_VM_MX_L1_TLB_CNTL,
1070                (0xA << 7) |
1071                ENABLE_L1_TLB |
1072                ENABLE_L1_FRAGMENT_PROCESSING |
1073                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1074                ENABLE_ADVANCED_DRIVER_MODEL |
1075                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1076         /* Setup L2 cache */
1077         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1078                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1079                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1080                EFFECTIVE_L2_QUEUE_SIZE(7) |
1081                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1082         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1083         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1084                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1085         /* setup context0 */
1086         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1087         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1088         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1089         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1090                         (u32)(rdev->dummy_page.addr >> 12));
1091         WREG32(VM_CONTEXT0_CNTL2, 0);
1092         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1093                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1094
1095         WREG32(0x15D4, 0);
1096         WREG32(0x15D8, 0);
1097         WREG32(0x15DC, 0);
1098
1099         /* empty context1-7 */
1100         /* Assign the pt base to something valid for now; the pts used for
1101          * the VMs are determined by the application and setup and assigned
1102          * on the fly in the vm part of radeon_gart.c
1103          */
1104         for (i = 1; i < 8; i++) {
1105                 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1106                 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1107                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1108                         rdev->gart.table_addr >> 12);
1109         }
1110
1111         /* enable context1-7 */
1112         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1113                (u32)(rdev->dummy_page.addr >> 12));
1114         WREG32(VM_CONTEXT1_CNTL2, 4);
1115         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1116                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1117                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1118                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1119                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1120                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1121                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1122                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1123                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1124                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1125                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1126                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1127                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1128
1129         cayman_pcie_gart_tlb_flush(rdev);
1130         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1131                  (unsigned)(rdev->mc.gtt_size >> 20),
1132                  (unsigned long long)rdev->gart.table_addr);
1133         rdev->gart.ready = true;
1134         return 0;
1135 }
1136
1137 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1138 {
1139         /* Disable all tables */
1140         WREG32(VM_CONTEXT0_CNTL, 0);
1141         WREG32(VM_CONTEXT1_CNTL, 0);
1142         /* Setup TLB control */
1143         WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1144                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1145                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1146         /* Setup L2 cache */
1147         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1148                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1149                EFFECTIVE_L2_QUEUE_SIZE(7) |
1150                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1151         WREG32(VM_L2_CNTL2, 0);
1152         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1153                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1154         radeon_gart_table_vram_unpin(rdev);
1155 }
1156
1157 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1158 {
1159         cayman_pcie_gart_disable(rdev);
1160         radeon_gart_table_vram_free(rdev);
1161         radeon_gart_fini(rdev);
1162 }
1163
1164 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1165                               int ring, u32 cp_int_cntl)
1166 {
1167         u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1168
1169         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1170         WREG32(CP_INT_CNTL, cp_int_cntl);
1171 }
1172
1173 /*
1174  * CP.
1175  */
1176 void cayman_fence_ring_emit(struct radeon_device *rdev,
1177                             struct radeon_fence *fence)
1178 {
1179         struct radeon_ring *ring = &rdev->ring[fence->ring];
1180         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1181
1182         /* flush read cache over gart for this vmid */
1183         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1184         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1185         radeon_ring_write(ring, 0);
1186         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1187         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1188         radeon_ring_write(ring, 0xFFFFFFFF);
1189         radeon_ring_write(ring, 0);
1190         radeon_ring_write(ring, 10); /* poll interval */
1191         /* EVENT_WRITE_EOP - flush caches, send int */
1192         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1193         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1194         radeon_ring_write(ring, addr & 0xffffffff);
1195         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1196         radeon_ring_write(ring, fence->seq);
1197         radeon_ring_write(ring, 0);
1198 }
1199
1200 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1201 {
1202         struct radeon_ring *ring = &rdev->ring[ib->ring];
1203
1204         /* set to DX10/11 mode */
1205         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1206         radeon_ring_write(ring, 1);
1207
1208         if (ring->rptr_save_reg) {
1209                 uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1210                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1211                 radeon_ring_write(ring, ((ring->rptr_save_reg - 
1212                                           PACKET3_SET_CONFIG_REG_START) >> 2));
1213                 radeon_ring_write(ring, next_rptr);
1214         }
1215
1216         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1217         radeon_ring_write(ring,
1218 #ifdef __BIG_ENDIAN
1219                           (2 << 0) |
1220 #endif
1221                           (ib->gpu_addr & 0xFFFFFFFC));
1222         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1223         radeon_ring_write(ring, ib->length_dw | 
1224                           (ib->vm ? (ib->vm->id << 24) : 0));
1225
1226         /* flush read cache over gart for this vmid */
1227         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1228         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1229         radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1230         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1231         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1232         radeon_ring_write(ring, 0xFFFFFFFF);
1233         radeon_ring_write(ring, 0);
1234         radeon_ring_write(ring, 10); /* poll interval */
1235 }
1236
1237 void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1238                                struct radeon_ring *ring,
1239                                struct radeon_semaphore *semaphore,
1240                                bool emit_wait)
1241 {
1242         uint64_t addr = semaphore->gpu_addr;
1243
1244         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
1245         radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
1246
1247         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
1248         radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
1249
1250         radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
1251         radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
1252 }
1253
1254 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1255 {
1256         if (enable)
1257                 WREG32(CP_ME_CNTL, 0);
1258         else {
1259                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1260                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1261                 WREG32(SCRATCH_UMSK, 0);
1262                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1263         }
1264 }
1265
1266 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1267 {
1268         const __be32 *fw_data;
1269         int i;
1270
1271         if (!rdev->me_fw || !rdev->pfp_fw)
1272                 return -EINVAL;
1273
1274         cayman_cp_enable(rdev, false);
1275
1276         fw_data = (const __be32 *)rdev->pfp_fw->data;
1277         WREG32(CP_PFP_UCODE_ADDR, 0);
1278         for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1279                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1280         WREG32(CP_PFP_UCODE_ADDR, 0);
1281
1282         fw_data = (const __be32 *)rdev->me_fw->data;
1283         WREG32(CP_ME_RAM_WADDR, 0);
1284         for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1285                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1286
1287         WREG32(CP_PFP_UCODE_ADDR, 0);
1288         WREG32(CP_ME_RAM_WADDR, 0);
1289         WREG32(CP_ME_RAM_RADDR, 0);
1290         return 0;
1291 }
1292
1293 static int cayman_cp_start(struct radeon_device *rdev)
1294 {
1295         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1296         int r, i;
1297
1298         r = radeon_ring_lock(rdev, ring, 7);
1299         if (r) {
1300                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1301                 return r;
1302         }
1303         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1304         radeon_ring_write(ring, 0x1);
1305         radeon_ring_write(ring, 0x0);
1306         radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1307         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1308         radeon_ring_write(ring, 0);
1309         radeon_ring_write(ring, 0);
1310         radeon_ring_unlock_commit(rdev, ring);
1311
1312         cayman_cp_enable(rdev, true);
1313
1314         r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1315         if (r) {
1316                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1317                 return r;
1318         }
1319
1320         /* setup clear context state */
1321         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1322         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1323
1324         for (i = 0; i < cayman_default_size; i++)
1325                 radeon_ring_write(ring, cayman_default_state[i]);
1326
1327         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1328         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1329
1330         /* set clear context state */
1331         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1332         radeon_ring_write(ring, 0);
1333
1334         /* SQ_VTX_BASE_VTX_LOC */
1335         radeon_ring_write(ring, 0xc0026f00);
1336         radeon_ring_write(ring, 0x00000000);
1337         radeon_ring_write(ring, 0x00000000);
1338         radeon_ring_write(ring, 0x00000000);
1339
1340         /* Clear consts */
1341         radeon_ring_write(ring, 0xc0036f00);
1342         radeon_ring_write(ring, 0x00000bc4);
1343         radeon_ring_write(ring, 0xffffffff);
1344         radeon_ring_write(ring, 0xffffffff);
1345         radeon_ring_write(ring, 0xffffffff);
1346
1347         radeon_ring_write(ring, 0xc0026900);
1348         radeon_ring_write(ring, 0x00000316);
1349         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1350         radeon_ring_write(ring, 0x00000010); /*  */
1351
1352         radeon_ring_unlock_commit(rdev, ring);
1353
1354         /* XXX init other rings */
1355
1356         return 0;
1357 }
1358
1359 static void cayman_cp_fini(struct radeon_device *rdev)
1360 {
1361         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1362         cayman_cp_enable(rdev, false);
1363         radeon_ring_fini(rdev, ring);
1364         radeon_scratch_free(rdev, ring->rptr_save_reg);
1365 }
1366
1367 static int cayman_cp_resume(struct radeon_device *rdev)
1368 {
1369         static const int ridx[] = {
1370                 RADEON_RING_TYPE_GFX_INDEX,
1371                 CAYMAN_RING_TYPE_CP1_INDEX,
1372                 CAYMAN_RING_TYPE_CP2_INDEX
1373         };
1374         static const unsigned cp_rb_cntl[] = {
1375                 CP_RB0_CNTL,
1376                 CP_RB1_CNTL,
1377                 CP_RB2_CNTL,
1378         };
1379         static const unsigned cp_rb_rptr_addr[] = {
1380                 CP_RB0_RPTR_ADDR,
1381                 CP_RB1_RPTR_ADDR,
1382                 CP_RB2_RPTR_ADDR
1383         };
1384         static const unsigned cp_rb_rptr_addr_hi[] = {
1385                 CP_RB0_RPTR_ADDR_HI,
1386                 CP_RB1_RPTR_ADDR_HI,
1387                 CP_RB2_RPTR_ADDR_HI
1388         };
1389         static const unsigned cp_rb_base[] = {
1390                 CP_RB0_BASE,
1391                 CP_RB1_BASE,
1392                 CP_RB2_BASE
1393         };
1394         struct radeon_ring *ring;
1395         int i, r;
1396
1397         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1398         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1399                                  SOFT_RESET_PA |
1400                                  SOFT_RESET_SH |
1401                                  SOFT_RESET_VGT |
1402                                  SOFT_RESET_SPI |
1403                                  SOFT_RESET_SX));
1404         RREG32(GRBM_SOFT_RESET);
1405         mdelay(15);
1406         WREG32(GRBM_SOFT_RESET, 0);
1407         RREG32(GRBM_SOFT_RESET);
1408
1409         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1410         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1411
1412         /* Set the write pointer delay */
1413         WREG32(CP_RB_WPTR_DELAY, 0);
1414
1415         WREG32(CP_DEBUG, (1 << 27));
1416
1417         /* set the wb address whether it's enabled or not */
1418         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1419         WREG32(SCRATCH_UMSK, 0xff);
1420
1421         for (i = 0; i < 3; ++i) {
1422                 uint32_t rb_cntl;
1423                 uint64_t addr;
1424
1425                 /* Set ring buffer size */
1426                 ring = &rdev->ring[ridx[i]];
1427                 rb_cntl = drm_order(ring->ring_size / 8);
1428                 rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1429 #ifdef __BIG_ENDIAN
1430                 rb_cntl |= BUF_SWAP_32BIT;
1431 #endif
1432                 WREG32(cp_rb_cntl[i], rb_cntl);
1433
1434                 /* set the wb address whether it's enabled or not */
1435                 addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1436                 WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1437                 WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1438         }
1439
1440         /* set the rb base addr, this causes an internal reset of ALL rings */
1441         for (i = 0; i < 3; ++i) {
1442                 ring = &rdev->ring[ridx[i]];
1443                 WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1444         }
1445
1446         for (i = 0; i < 3; ++i) {
1447                 /* Initialize the ring buffer's read and write pointers */
1448                 ring = &rdev->ring[ridx[i]];
1449                 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1450
1451                 ring->rptr = ring->wptr = 0;
1452                 WREG32(ring->rptr_reg, ring->rptr);
1453                 WREG32(ring->wptr_reg, ring->wptr);
1454
1455                 mdelay(1);
1456                 WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1457         }
1458
1459         /* start the rings */
1460         cayman_cp_start(rdev);
1461         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1462         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1463         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1464         /* this only test cp0 */
1465         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1466         if (r) {
1467                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1468                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1469                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1470                 return r;
1471         }
1472
1473         return 0;
1474 }
1475
1476 /*
1477  * DMA
1478  * Starting with R600, the GPU has an asynchronous
1479  * DMA engine.  The programming model is very similar
1480  * to the 3D engine (ring buffer, IBs, etc.), but the
1481  * DMA controller has it's own packet format that is
1482  * different form the PM4 format used by the 3D engine.
1483  * It supports copying data, writing embedded data,
1484  * solid fills, and a number of other things.  It also
1485  * has support for tiling/detiling of buffers.
1486  * Cayman and newer support two asynchronous DMA engines.
1487  */
1488 /**
1489  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1490  *
1491  * @rdev: radeon_device pointer
1492  * @ib: IB object to schedule
1493  *
1494  * Schedule an IB in the DMA ring (cayman-SI).
1495  */
1496 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1497                                 struct radeon_ib *ib)
1498 {
1499         struct radeon_ring *ring = &rdev->ring[ib->ring];
1500
1501         if (rdev->wb.enabled) {
1502                 u32 next_rptr = ring->wptr + 4;
1503                 while ((next_rptr & 7) != 5)
1504                         next_rptr++;
1505                 next_rptr += 3;
1506                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1507                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1508                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1509                 radeon_ring_write(ring, next_rptr);
1510         }
1511
1512         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1513          * Pad as necessary with NOPs.
1514          */
1515         while ((ring->wptr & 7) != 5)
1516                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1517         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1518         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1519         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1520
1521 }
1522
1523 /**
1524  * cayman_dma_stop - stop the async dma engines
1525  *
1526  * @rdev: radeon_device pointer
1527  *
1528  * Stop the async dma engines (cayman-SI).
1529  */
1530 void cayman_dma_stop(struct radeon_device *rdev)
1531 {
1532         u32 rb_cntl;
1533
1534         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1535
1536         /* dma0 */
1537         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1538         rb_cntl &= ~DMA_RB_ENABLE;
1539         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1540
1541         /* dma1 */
1542         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1543         rb_cntl &= ~DMA_RB_ENABLE;
1544         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1545
1546         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1547         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1548 }
1549
1550 /**
1551  * cayman_dma_resume - setup and start the async dma engines
1552  *
1553  * @rdev: radeon_device pointer
1554  *
1555  * Set up the DMA ring buffers and enable them. (cayman-SI).
1556  * Returns 0 for success, error for failure.
1557  */
1558 int cayman_dma_resume(struct radeon_device *rdev)
1559 {
1560         struct radeon_ring *ring;
1561         u32 rb_cntl, dma_cntl, ib_cntl;
1562         u32 rb_bufsz;
1563         u32 reg_offset, wb_offset;
1564         int i, r;
1565
1566         /* Reset dma */
1567         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1568         RREG32(SRBM_SOFT_RESET);
1569         udelay(50);
1570         WREG32(SRBM_SOFT_RESET, 0);
1571
1572         for (i = 0; i < 2; i++) {
1573                 if (i == 0) {
1574                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1575                         reg_offset = DMA0_REGISTER_OFFSET;
1576                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
1577                 } else {
1578                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1579                         reg_offset = DMA1_REGISTER_OFFSET;
1580                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1581                 }
1582
1583                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1584                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1585
1586                 /* Set ring buffer size in dwords */
1587                 rb_bufsz = drm_order(ring->ring_size / 4);
1588                 rb_cntl = rb_bufsz << 1;
1589 #ifdef __BIG_ENDIAN
1590                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1591 #endif
1592                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1593
1594                 /* Initialize the ring buffer's read and write pointers */
1595                 WREG32(DMA_RB_RPTR + reg_offset, 0);
1596                 WREG32(DMA_RB_WPTR + reg_offset, 0);
1597
1598                 /* set the wb address whether it's enabled or not */
1599                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1600                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1601                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1602                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1603
1604                 if (rdev->wb.enabled)
1605                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1606
1607                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1608
1609                 /* enable DMA IBs */
1610                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1611 #ifdef __BIG_ENDIAN
1612                 ib_cntl |= DMA_IB_SWAP_ENABLE;
1613 #endif
1614                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1615
1616                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1617                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1618                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1619
1620                 ring->wptr = 0;
1621                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1622
1623                 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1624
1625                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1626
1627                 ring->ready = true;
1628
1629                 r = radeon_ring_test(rdev, ring->idx, ring);
1630                 if (r) {
1631                         ring->ready = false;
1632                         return r;
1633                 }
1634         }
1635
1636         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1637
1638         return 0;
1639 }
1640
1641 /**
1642  * cayman_dma_fini - tear down the async dma engines
1643  *
1644  * @rdev: radeon_device pointer
1645  *
1646  * Stop the async dma engines and free the rings (cayman-SI).
1647  */
1648 void cayman_dma_fini(struct radeon_device *rdev)
1649 {
1650         cayman_dma_stop(rdev);
1651         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1652         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1653 }
1654
1655 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1656 {
1657         u32 reset_mask = 0;
1658         u32 tmp;
1659
1660         /* GRBM_STATUS */
1661         tmp = RREG32(GRBM_STATUS);
1662         if (tmp & (PA_BUSY | SC_BUSY |
1663                    SH_BUSY | SX_BUSY |
1664                    TA_BUSY | VGT_BUSY |
1665                    DB_BUSY | CB_BUSY |
1666                    GDS_BUSY | SPI_BUSY |
1667                    IA_BUSY | IA_BUSY_NO_DMA))
1668                 reset_mask |= RADEON_RESET_GFX;
1669
1670         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1671                    CP_BUSY | CP_COHERENCY_BUSY))
1672                 reset_mask |= RADEON_RESET_CP;
1673
1674         if (tmp & GRBM_EE_BUSY)
1675                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1676
1677         /* DMA_STATUS_REG 0 */
1678         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1679         if (!(tmp & DMA_IDLE))
1680                 reset_mask |= RADEON_RESET_DMA;
1681
1682         /* DMA_STATUS_REG 1 */
1683         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1684         if (!(tmp & DMA_IDLE))
1685                 reset_mask |= RADEON_RESET_DMA1;
1686
1687         /* SRBM_STATUS2 */
1688         tmp = RREG32(SRBM_STATUS2);
1689         if (tmp & DMA_BUSY)
1690                 reset_mask |= RADEON_RESET_DMA;
1691
1692         if (tmp & DMA1_BUSY)
1693                 reset_mask |= RADEON_RESET_DMA1;
1694
1695         /* SRBM_STATUS */
1696         tmp = RREG32(SRBM_STATUS);
1697         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1698                 reset_mask |= RADEON_RESET_RLC;
1699
1700         if (tmp & IH_BUSY)
1701                 reset_mask |= RADEON_RESET_IH;
1702
1703         if (tmp & SEM_BUSY)
1704                 reset_mask |= RADEON_RESET_SEM;
1705
1706         if (tmp & GRBM_RQ_PENDING)
1707                 reset_mask |= RADEON_RESET_GRBM;
1708
1709         if (tmp & VMC_BUSY)
1710                 reset_mask |= RADEON_RESET_VMC;
1711
1712         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1713                    MCC_BUSY | MCD_BUSY))
1714                 reset_mask |= RADEON_RESET_MC;
1715
1716         if (evergreen_is_display_hung(rdev))
1717                 reset_mask |= RADEON_RESET_DISPLAY;
1718
1719         /* VM_L2_STATUS */
1720         tmp = RREG32(VM_L2_STATUS);
1721         if (tmp & L2_BUSY)
1722                 reset_mask |= RADEON_RESET_VMC;
1723
1724         /* Skip MC reset as it's mostly likely not hung, just busy */
1725         if (reset_mask & RADEON_RESET_MC) {
1726                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1727                 reset_mask &= ~RADEON_RESET_MC;
1728         }
1729
1730         return reset_mask;
1731 }
1732
1733 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1734 {
1735         struct evergreen_mc_save save;
1736         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1737         u32 tmp;
1738
1739         if (reset_mask == 0)
1740                 return;
1741
1742         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1743
1744         evergreen_print_gpu_status_regs(rdev);
1745         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1746                  RREG32(0x14F8));
1747         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1748                  RREG32(0x14D8));
1749         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1750                  RREG32(0x14FC));
1751         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1752                  RREG32(0x14DC));
1753
1754         /* Disable CP parsing/prefetching */
1755         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1756
1757         if (reset_mask & RADEON_RESET_DMA) {
1758                 /* dma0 */
1759                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1760                 tmp &= ~DMA_RB_ENABLE;
1761                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1762         }
1763
1764         if (reset_mask & RADEON_RESET_DMA1) {
1765                 /* dma1 */
1766                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1767                 tmp &= ~DMA_RB_ENABLE;
1768                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1769         }
1770
1771         udelay(50);
1772
1773         evergreen_mc_stop(rdev, &save);
1774         if (evergreen_mc_wait_for_idle(rdev)) {
1775                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1776         }
1777
1778         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1779                 grbm_soft_reset = SOFT_RESET_CB |
1780                         SOFT_RESET_DB |
1781                         SOFT_RESET_GDS |
1782                         SOFT_RESET_PA |
1783                         SOFT_RESET_SC |
1784                         SOFT_RESET_SPI |
1785                         SOFT_RESET_SH |
1786                         SOFT_RESET_SX |
1787                         SOFT_RESET_TC |
1788                         SOFT_RESET_TA |
1789                         SOFT_RESET_VGT |
1790                         SOFT_RESET_IA;
1791         }
1792
1793         if (reset_mask & RADEON_RESET_CP) {
1794                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1795
1796                 srbm_soft_reset |= SOFT_RESET_GRBM;
1797         }
1798
1799         if (reset_mask & RADEON_RESET_DMA)
1800                 srbm_soft_reset |= SOFT_RESET_DMA;
1801
1802         if (reset_mask & RADEON_RESET_DMA1)
1803                 srbm_soft_reset |= SOFT_RESET_DMA1;
1804
1805         if (reset_mask & RADEON_RESET_DISPLAY)
1806                 srbm_soft_reset |= SOFT_RESET_DC;
1807
1808         if (reset_mask & RADEON_RESET_RLC)
1809                 srbm_soft_reset |= SOFT_RESET_RLC;
1810
1811         if (reset_mask & RADEON_RESET_SEM)
1812                 srbm_soft_reset |= SOFT_RESET_SEM;
1813
1814         if (reset_mask & RADEON_RESET_IH)
1815                 srbm_soft_reset |= SOFT_RESET_IH;
1816
1817         if (reset_mask & RADEON_RESET_GRBM)
1818                 srbm_soft_reset |= SOFT_RESET_GRBM;
1819
1820         if (reset_mask & RADEON_RESET_VMC)
1821                 srbm_soft_reset |= SOFT_RESET_VMC;
1822
1823         if (!(rdev->flags & RADEON_IS_IGP)) {
1824                 if (reset_mask & RADEON_RESET_MC)
1825                         srbm_soft_reset |= SOFT_RESET_MC;
1826         }
1827
1828         if (grbm_soft_reset) {
1829                 tmp = RREG32(GRBM_SOFT_RESET);
1830                 tmp |= grbm_soft_reset;
1831                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1832                 WREG32(GRBM_SOFT_RESET, tmp);
1833                 tmp = RREG32(GRBM_SOFT_RESET);
1834
1835                 udelay(50);
1836
1837                 tmp &= ~grbm_soft_reset;
1838                 WREG32(GRBM_SOFT_RESET, tmp);
1839                 tmp = RREG32(GRBM_SOFT_RESET);
1840         }
1841
1842         if (srbm_soft_reset) {
1843                 tmp = RREG32(SRBM_SOFT_RESET);
1844                 tmp |= srbm_soft_reset;
1845                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1846                 WREG32(SRBM_SOFT_RESET, tmp);
1847                 tmp = RREG32(SRBM_SOFT_RESET);
1848
1849                 udelay(50);
1850
1851                 tmp &= ~srbm_soft_reset;
1852                 WREG32(SRBM_SOFT_RESET, tmp);
1853                 tmp = RREG32(SRBM_SOFT_RESET);
1854         }
1855
1856         /* Wait a little for things to settle down */
1857         udelay(50);
1858
1859         evergreen_mc_resume(rdev, &save);
1860         udelay(50);
1861
1862         evergreen_print_gpu_status_regs(rdev);
1863 }
1864
1865 int cayman_asic_reset(struct radeon_device *rdev)
1866 {
1867         u32 reset_mask;
1868
1869         reset_mask = cayman_gpu_check_soft_reset(rdev);
1870
1871         if (reset_mask)
1872                 r600_set_bios_scratch_engine_hung(rdev, true);
1873
1874         cayman_gpu_soft_reset(rdev, reset_mask);
1875
1876         reset_mask = cayman_gpu_check_soft_reset(rdev);
1877
1878         if (!reset_mask)
1879                 r600_set_bios_scratch_engine_hung(rdev, false);
1880
1881         return 0;
1882 }
1883
1884 /**
1885  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1886  *
1887  * @rdev: radeon_device pointer
1888  * @ring: radeon_ring structure holding ring information
1889  *
1890  * Check if the GFX engine is locked up.
1891  * Returns true if the engine appears to be locked up, false if not.
1892  */
1893 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1894 {
1895         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1896
1897         if (!(reset_mask & (RADEON_RESET_GFX |
1898                             RADEON_RESET_COMPUTE |
1899                             RADEON_RESET_CP))) {
1900                 radeon_ring_lockup_update(ring);
1901                 return false;
1902         }
1903         /* force CP activities */
1904         radeon_ring_force_activity(rdev, ring);
1905         return radeon_ring_test_lockup(rdev, ring);
1906 }
1907
1908 /**
1909  * cayman_dma_is_lockup - Check if the DMA engine is locked up
1910  *
1911  * @rdev: radeon_device pointer
1912  * @ring: radeon_ring structure holding ring information
1913  *
1914  * Check if the async DMA engine is locked up.
1915  * Returns true if the engine appears to be locked up, false if not.
1916  */
1917 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1918 {
1919         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1920         u32 mask;
1921
1922         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1923                 mask = RADEON_RESET_DMA;
1924         else
1925                 mask = RADEON_RESET_DMA1;
1926
1927         if (!(reset_mask & mask)) {
1928                 radeon_ring_lockup_update(ring);
1929                 return false;
1930         }
1931         /* force ring activities */
1932         radeon_ring_force_activity(rdev, ring);
1933         return radeon_ring_test_lockup(rdev, ring);
1934 }
1935
1936 static int cayman_startup(struct radeon_device *rdev)
1937 {
1938         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1939         int r;
1940
1941         /* enable pcie gen2 link */
1942         evergreen_pcie_gen2_enable(rdev);
1943
1944         evergreen_mc_program(rdev);
1945
1946         if (rdev->flags & RADEON_IS_IGP) {
1947                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1948                         r = ni_init_microcode(rdev);
1949                         if (r) {
1950                                 DRM_ERROR("Failed to load firmware!\n");
1951                                 return r;
1952                         }
1953                 }
1954         } else {
1955                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1956                         r = ni_init_microcode(rdev);
1957                         if (r) {
1958                                 DRM_ERROR("Failed to load firmware!\n");
1959                                 return r;
1960                         }
1961                 }
1962
1963                 r = ni_mc_load_microcode(rdev);
1964                 if (r) {
1965                         DRM_ERROR("Failed to load MC firmware!\n");
1966                         return r;
1967                 }
1968         }
1969
1970         r = r600_vram_scratch_init(rdev);
1971         if (r)
1972                 return r;
1973
1974         r = cayman_pcie_gart_enable(rdev);
1975         if (r)
1976                 return r;
1977         cayman_gpu_init(rdev);
1978
1979         r = evergreen_blit_init(rdev);
1980         if (r) {
1981                 r600_blit_fini(rdev);
1982                 rdev->asic->copy.copy = NULL;
1983                 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
1984         }
1985
1986         /* allocate rlc buffers */
1987         if (rdev->flags & RADEON_IS_IGP) {
1988                 r = si_rlc_init(rdev);
1989                 if (r) {
1990                         DRM_ERROR("Failed to init rlc BOs!\n");
1991                         return r;
1992                 }
1993         }
1994
1995         /* allocate wb buffer */
1996         r = radeon_wb_init(rdev);
1997         if (r)
1998                 return r;
1999
2000         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
2001         if (r) {
2002                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2003                 return r;
2004         }
2005
2006         r = rv770_uvd_resume(rdev);
2007         if (!r) {
2008                 r = radeon_fence_driver_start_ring(rdev,
2009                                                    R600_RING_TYPE_UVD_INDEX);
2010                 if (r)
2011                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2012         }
2013         if (r)
2014                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2015
2016         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2017         if (r) {
2018                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2019                 return r;
2020         }
2021
2022         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2023         if (r) {
2024                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2025                 return r;
2026         }
2027
2028         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2029         if (r) {
2030                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2031                 return r;
2032         }
2033
2034         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2035         if (r) {
2036                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2037                 return r;
2038         }
2039
2040         /* Enable IRQ */
2041         if (!rdev->irq.installed) {
2042                 r = radeon_irq_kms_init(rdev);
2043                 if (r)
2044                         return r;
2045         }
2046
2047         r = r600_irq_init(rdev);
2048         if (r) {
2049                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
2050                 radeon_irq_kms_fini(rdev);
2051                 return r;
2052         }
2053         evergreen_irq_set(rdev);
2054
2055         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2056                              CP_RB0_RPTR, CP_RB0_WPTR,
2057                              0, 0xfffff, RADEON_CP_PACKET2);
2058         if (r)
2059                 return r;
2060
2061         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2062         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2063                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2064                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2065                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2066         if (r)
2067                 return r;
2068
2069         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2070         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2071                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2072                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2073                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2074         if (r)
2075                 return r;
2076
2077         r = cayman_cp_load_microcode(rdev);
2078         if (r)
2079                 return r;
2080         r = cayman_cp_resume(rdev);
2081         if (r)
2082                 return r;
2083
2084         r = cayman_dma_resume(rdev);
2085         if (r)
2086                 return r;
2087
2088         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2089         if (ring->ring_size) {
2090                 r = radeon_ring_init(rdev, ring, ring->ring_size,
2091                                      R600_WB_UVD_RPTR_OFFSET,
2092                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2093                                      0, 0xfffff, RADEON_CP_PACKET2);
2094                 if (!r)
2095                         r = r600_uvd_init(rdev);
2096                 if (r)
2097                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2098         }
2099
2100         r = radeon_ib_pool_init(rdev);
2101         if (r) {
2102                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2103                 return r;
2104         }
2105
2106         r = radeon_vm_manager_init(rdev);
2107         if (r) {
2108                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2109                 return r;
2110         }
2111
2112         r = r600_audio_init(rdev);
2113         if (r)
2114                 return r;
2115
2116         return 0;
2117 }
2118
2119 int cayman_resume(struct radeon_device *rdev)
2120 {
2121         int r;
2122
2123         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2124          * posting will perform necessary task to bring back GPU into good
2125          * shape.
2126          */
2127         /* post card */
2128         atom_asic_init(rdev->mode_info.atom_context);
2129
2130         /* init golden registers */
2131         ni_init_golden_registers(rdev);
2132
2133         rdev->accel_working = true;
2134         r = cayman_startup(rdev);
2135         if (r) {
2136                 DRM_ERROR("cayman startup failed on resume\n");
2137                 rdev->accel_working = false;
2138                 return r;
2139         }
2140         return r;
2141 }
2142
2143 int cayman_suspend(struct radeon_device *rdev)
2144 {
2145         r600_audio_fini(rdev);
2146         radeon_vm_manager_fini(rdev);
2147         cayman_cp_enable(rdev, false);
2148         cayman_dma_stop(rdev);
2149         r600_uvd_stop(rdev);
2150         radeon_uvd_suspend(rdev);
2151         evergreen_irq_suspend(rdev);
2152         radeon_wb_disable(rdev);
2153         cayman_pcie_gart_disable(rdev);
2154         return 0;
2155 }
2156
2157 /* Plan is to move initialization in that function and use
2158  * helper function so that radeon_device_init pretty much
2159  * do nothing more than calling asic specific function. This
2160  * should also allow to remove a bunch of callback function
2161  * like vram_info.
2162  */
2163 int cayman_init(struct radeon_device *rdev)
2164 {
2165         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2166         int r;
2167
2168         /* Read BIOS */
2169         if (!radeon_get_bios(rdev)) {
2170                 if (ASIC_IS_AVIVO(rdev))
2171                         return -EINVAL;
2172         }
2173         /* Must be an ATOMBIOS */
2174         if (!rdev->is_atom_bios) {
2175                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2176                 return -EINVAL;
2177         }
2178         r = radeon_atombios_init(rdev);
2179         if (r)
2180                 return r;
2181
2182         /* Post card if necessary */
2183         if (!radeon_card_posted(rdev)) {
2184                 if (!rdev->bios) {
2185                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2186                         return -EINVAL;
2187                 }
2188                 DRM_INFO("GPU not posted. posting now...\n");
2189                 atom_asic_init(rdev->mode_info.atom_context);
2190         }
2191         /* init golden registers */
2192         ni_init_golden_registers(rdev);
2193         /* Initialize scratch registers */
2194         r600_scratch_init(rdev);
2195         /* Initialize surface registers */
2196         radeon_surface_init(rdev);
2197         /* Initialize clocks */
2198         radeon_get_clock_info(rdev->ddev);
2199         /* Fence driver */
2200         r = radeon_fence_driver_init(rdev);
2201         if (r)
2202                 return r;
2203         /* initialize memory controller */
2204         r = evergreen_mc_init(rdev);
2205         if (r)
2206                 return r;
2207         /* Memory manager */
2208         r = radeon_bo_init(rdev);
2209         if (r)
2210                 return r;
2211
2212         ring->ring_obj = NULL;
2213         r600_ring_init(rdev, ring, 1024 * 1024);
2214
2215         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2216         ring->ring_obj = NULL;
2217         r600_ring_init(rdev, ring, 64 * 1024);
2218
2219         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2220         ring->ring_obj = NULL;
2221         r600_ring_init(rdev, ring, 64 * 1024);
2222
2223         r = radeon_uvd_init(rdev);
2224         if (!r) {
2225                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2226                 ring->ring_obj = NULL;
2227                 r600_ring_init(rdev, ring, 4096);
2228         }
2229
2230         rdev->ih.ring_obj = NULL;
2231         r600_ih_ring_init(rdev, 64 * 1024);
2232
2233         r = r600_pcie_gart_init(rdev);
2234         if (r)
2235                 return r;
2236
2237         rdev->accel_working = true;
2238         r = cayman_startup(rdev);
2239         if (r) {
2240                 dev_err(rdev->dev, "disabling GPU acceleration\n");
2241                 cayman_cp_fini(rdev);
2242                 cayman_dma_fini(rdev);
2243                 r600_irq_fini(rdev);
2244                 if (rdev->flags & RADEON_IS_IGP)
2245                         si_rlc_fini(rdev);
2246                 radeon_wb_fini(rdev);
2247                 radeon_ib_pool_fini(rdev);
2248                 radeon_vm_manager_fini(rdev);
2249                 radeon_irq_kms_fini(rdev);
2250                 cayman_pcie_gart_fini(rdev);
2251                 rdev->accel_working = false;
2252         }
2253
2254         /* Don't start up if the MC ucode is missing.
2255          * The default clocks and voltages before the MC ucode
2256          * is loaded are not suffient for advanced operations.
2257          *
2258          * We can skip this check for TN, because there is no MC
2259          * ucode.
2260          */
2261         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2262                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
2263                 return -EINVAL;
2264         }
2265
2266         return 0;
2267 }
2268
2269 void cayman_fini(struct radeon_device *rdev)
2270 {
2271         r600_blit_fini(rdev);
2272         cayman_cp_fini(rdev);
2273         cayman_dma_fini(rdev);
2274         r600_irq_fini(rdev);
2275         if (rdev->flags & RADEON_IS_IGP)
2276                 si_rlc_fini(rdev);
2277         radeon_wb_fini(rdev);
2278         radeon_vm_manager_fini(rdev);
2279         radeon_ib_pool_fini(rdev);
2280         radeon_irq_kms_fini(rdev);
2281         r600_uvd_stop(rdev);
2282         radeon_uvd_fini(rdev);
2283         cayman_pcie_gart_fini(rdev);
2284         r600_vram_scratch_fini(rdev);
2285         radeon_gem_fini(rdev);
2286         radeon_fence_driver_fini(rdev);
2287         radeon_bo_fini(rdev);
2288         radeon_atombios_fini(rdev);
2289         kfree(rdev->bios);
2290         rdev->bios = NULL;
2291 }
2292
2293 /*
2294  * vm
2295  */
2296 int cayman_vm_init(struct radeon_device *rdev)
2297 {
2298         /* number of VMs */
2299         rdev->vm_manager.nvm = 8;
2300         /* base offset of vram pages */
2301         if (rdev->flags & RADEON_IS_IGP) {
2302                 u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2303                 tmp <<= 22;
2304                 rdev->vm_manager.vram_base_offset = tmp;
2305         } else
2306                 rdev->vm_manager.vram_base_offset = 0;
2307         return 0;
2308 }
2309
2310 void cayman_vm_fini(struct radeon_device *rdev)
2311 {
2312 }
2313
2314 #define R600_ENTRY_VALID   (1 << 0)
2315 #define R600_PTE_SYSTEM    (1 << 1)
2316 #define R600_PTE_SNOOPED   (1 << 2)
2317 #define R600_PTE_READABLE  (1 << 5)
2318 #define R600_PTE_WRITEABLE (1 << 6)
2319
2320 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2321 {
2322         uint32_t r600_flags = 0;
2323         r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2324         r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2325         r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2326         if (flags & RADEON_VM_PAGE_SYSTEM) {
2327                 r600_flags |= R600_PTE_SYSTEM;
2328                 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2329         }
2330         return r600_flags;
2331 }
2332
2333 /**
2334  * cayman_vm_set_page - update the page tables using the CP
2335  *
2336  * @rdev: radeon_device pointer
2337  * @ib: indirect buffer to fill with commands
2338  * @pe: addr of the page entry
2339  * @addr: dst addr to write into pe
2340  * @count: number of page entries to update
2341  * @incr: increase next addr by incr bytes
2342  * @flags: access flags
2343  *
2344  * Update the page tables using the CP (cayman/TN).
2345  */
2346 void cayman_vm_set_page(struct radeon_device *rdev,
2347                         struct radeon_ib *ib,
2348                         uint64_t pe,
2349                         uint64_t addr, unsigned count,
2350                         uint32_t incr, uint32_t flags)
2351 {
2352         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2353         uint64_t value;
2354         unsigned ndw;
2355
2356         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2357                 while (count) {
2358                         ndw = 1 + count * 2;
2359                         if (ndw > 0x3FFF)
2360                                 ndw = 0x3FFF;
2361
2362                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2363                         ib->ptr[ib->length_dw++] = pe;
2364                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2365                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2366                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
2367                                         value = radeon_vm_map_gart(rdev, addr);
2368                                         value &= 0xFFFFFFFFFFFFF000ULL;
2369                                 } else if (flags & RADEON_VM_PAGE_VALID) {
2370                                         value = addr;
2371                                 } else {
2372                                         value = 0;
2373                                 }
2374                                 addr += incr;
2375                                 value |= r600_flags;
2376                                 ib->ptr[ib->length_dw++] = value;
2377                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2378                         }
2379                 }
2380         } else {
2381                 if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2382                     (count == 1)) {
2383                         while (count) {
2384                                 ndw = count * 2;
2385                                 if (ndw > 0xFFFFE)
2386                                         ndw = 0xFFFFE;
2387
2388                                 /* for non-physically contiguous pages (system) */
2389                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2390                                 ib->ptr[ib->length_dw++] = pe;
2391                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2392                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2393                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
2394                                                 value = radeon_vm_map_gart(rdev, addr);
2395                                                 value &= 0xFFFFFFFFFFFFF000ULL;
2396                                         } else if (flags & RADEON_VM_PAGE_VALID) {
2397                                                 value = addr;
2398                                         } else {
2399                                                 value = 0;
2400                                         }
2401                                         addr += incr;
2402                                         value |= r600_flags;
2403                                         ib->ptr[ib->length_dw++] = value;
2404                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
2405                                 }
2406                         }
2407                         while (ib->length_dw & 0x7)
2408                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2409                 } else {
2410                         while (count) {
2411                                 ndw = count * 2;
2412                                 if (ndw > 0xFFFFE)
2413                                         ndw = 0xFFFFE;
2414
2415                                 if (flags & RADEON_VM_PAGE_VALID)
2416                                         value = addr;
2417                                 else
2418                                         value = 0;
2419                                 /* for physically contiguous pages (vram) */
2420                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2421                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2422                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2423                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2424                                 ib->ptr[ib->length_dw++] = 0;
2425                                 ib->ptr[ib->length_dw++] = value; /* value */
2426                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2427                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
2428                                 ib->ptr[ib->length_dw++] = 0;
2429                                 pe += ndw * 4;
2430                                 addr += (ndw / 2) * incr;
2431                                 count -= ndw / 2;
2432                         }
2433                 }
2434                 while (ib->length_dw & 0x7)
2435                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2436         }
2437 }
2438
2439 /**
2440  * cayman_vm_flush - vm flush using the CP
2441  *
2442  * @rdev: radeon_device pointer
2443  *
2444  * Update the page table base and flush the VM TLB
2445  * using the CP (cayman-si).
2446  */
2447 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2448 {
2449         struct radeon_ring *ring = &rdev->ring[ridx];
2450
2451         if (vm == NULL)
2452                 return;
2453
2454         radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2455         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2456
2457         /* flush hdp cache */
2458         radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2459         radeon_ring_write(ring, 0x1);
2460
2461         /* bits 0-7 are the VM contexts0-7 */
2462         radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2463         radeon_ring_write(ring, 1 << vm->id);
2464
2465         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2466         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2467         radeon_ring_write(ring, 0x0);
2468 }
2469
2470 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2471 {
2472         struct radeon_ring *ring = &rdev->ring[ridx];
2473
2474         if (vm == NULL)
2475                 return;
2476
2477         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2478         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2479         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2480
2481         /* flush hdp cache */
2482         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2483         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2484         radeon_ring_write(ring, 1);
2485
2486         /* bits 0-7 are the VM contexts0-7 */
2487         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2488         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2489         radeon_ring_write(ring, 1 << vm->id);
2490 }
2491