Merge remote-tracking branch 'lsk/v3.10/topic/gator' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68
69 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
70 extern void r600_ih_ring_fini(struct radeon_device *rdev);
71 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
75 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
76 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
77
78 static const u32 tahiti_golden_rlc_registers[] =
79 {
80         0xc424, 0xffffffff, 0x00601005,
81         0xc47c, 0xffffffff, 0x10104040,
82         0xc488, 0xffffffff, 0x0100000a,
83         0xc314, 0xffffffff, 0x00000800,
84         0xc30c, 0xffffffff, 0x800000f4,
85         0xf4a8, 0xffffffff, 0x00000000
86 };
87
88 static const u32 tahiti_golden_registers[] =
89 {
90         0x9a10, 0x00010000, 0x00018208,
91         0x9830, 0xffffffff, 0x00000000,
92         0x9834, 0xf00fffff, 0x00000400,
93         0x9838, 0x0002021c, 0x00020200,
94         0xc78, 0x00000080, 0x00000000,
95         0xd030, 0x000300c0, 0x00800040,
96         0xd830, 0x000300c0, 0x00800040,
97         0x5bb0, 0x000000f0, 0x00000070,
98         0x5bc0, 0x00200000, 0x50100000,
99         0x7030, 0x31000311, 0x00000011,
100         0x277c, 0x00000003, 0x000007ff,
101         0x240c, 0x000007ff, 0x00000000,
102         0x8a14, 0xf000001f, 0x00000007,
103         0x8b24, 0xffffffff, 0x00ffffff,
104         0x8b10, 0x0000ff0f, 0x00000000,
105         0x28a4c, 0x07ffffff, 0x4e000000,
106         0x28350, 0x3f3f3fff, 0x2a00126a,
107         0x30, 0x000000ff, 0x0040,
108         0x34, 0x00000040, 0x00004040,
109         0x9100, 0x07ffffff, 0x03000000,
110         0x8e88, 0x01ff1f3f, 0x00000000,
111         0x8e84, 0x01ff1f3f, 0x00000000,
112         0x9060, 0x0000007f, 0x00000020,
113         0x9508, 0x00010000, 0x00010000,
114         0xac14, 0x00000200, 0x000002fb,
115         0xac10, 0xffffffff, 0x0000543b,
116         0xac0c, 0xffffffff, 0xa9210876,
117         0x88d0, 0xffffffff, 0x000fff40,
118         0x88d4, 0x0000001f, 0x00000010,
119         0x1410, 0x20000000, 0x20fffed8,
120         0x15c0, 0x000c0fc0, 0x000c0400
121 };
122
123 static const u32 tahiti_golden_registers2[] =
124 {
125         0xc64, 0x00000001, 0x00000001
126 };
127
128 static const u32 pitcairn_golden_rlc_registers[] =
129 {
130         0xc424, 0xffffffff, 0x00601004,
131         0xc47c, 0xffffffff, 0x10102020,
132         0xc488, 0xffffffff, 0x01000020,
133         0xc314, 0xffffffff, 0x00000800,
134         0xc30c, 0xffffffff, 0x800000a4
135 };
136
137 static const u32 pitcairn_golden_registers[] =
138 {
139         0x9a10, 0x00010000, 0x00018208,
140         0x9830, 0xffffffff, 0x00000000,
141         0x9834, 0xf00fffff, 0x00000400,
142         0x9838, 0x0002021c, 0x00020200,
143         0xc78, 0x00000080, 0x00000000,
144         0xd030, 0x000300c0, 0x00800040,
145         0xd830, 0x000300c0, 0x00800040,
146         0x5bb0, 0x000000f0, 0x00000070,
147         0x5bc0, 0x00200000, 0x50100000,
148         0x7030, 0x31000311, 0x00000011,
149         0x2ae4, 0x00073ffe, 0x000022a2,
150         0x240c, 0x000007ff, 0x00000000,
151         0x8a14, 0xf000001f, 0x00000007,
152         0x8b24, 0xffffffff, 0x00ffffff,
153         0x8b10, 0x0000ff0f, 0x00000000,
154         0x28a4c, 0x07ffffff, 0x4e000000,
155         0x28350, 0x3f3f3fff, 0x2a00126a,
156         0x30, 0x000000ff, 0x0040,
157         0x34, 0x00000040, 0x00004040,
158         0x9100, 0x07ffffff, 0x03000000,
159         0x9060, 0x0000007f, 0x00000020,
160         0x9508, 0x00010000, 0x00010000,
161         0xac14, 0x000003ff, 0x000000f7,
162         0xac10, 0xffffffff, 0x00000000,
163         0xac0c, 0xffffffff, 0x32761054,
164         0x88d4, 0x0000001f, 0x00000010,
165         0x15c0, 0x000c0fc0, 0x000c0400
166 };
167
168 static const u32 verde_golden_rlc_registers[] =
169 {
170         0xc424, 0xffffffff, 0x033f1005,
171         0xc47c, 0xffffffff, 0x10808020,
172         0xc488, 0xffffffff, 0x00800008,
173         0xc314, 0xffffffff, 0x00001000,
174         0xc30c, 0xffffffff, 0x80010014
175 };
176
177 static const u32 verde_golden_registers[] =
178 {
179         0x9a10, 0x00010000, 0x00018208,
180         0x9830, 0xffffffff, 0x00000000,
181         0x9834, 0xf00fffff, 0x00000400,
182         0x9838, 0x0002021c, 0x00020200,
183         0xc78, 0x00000080, 0x00000000,
184         0xd030, 0x000300c0, 0x00800040,
185         0xd030, 0x000300c0, 0x00800040,
186         0xd830, 0x000300c0, 0x00800040,
187         0xd830, 0x000300c0, 0x00800040,
188         0x5bb0, 0x000000f0, 0x00000070,
189         0x5bc0, 0x00200000, 0x50100000,
190         0x7030, 0x31000311, 0x00000011,
191         0x2ae4, 0x00073ffe, 0x000022a2,
192         0x2ae4, 0x00073ffe, 0x000022a2,
193         0x2ae4, 0x00073ffe, 0x000022a2,
194         0x240c, 0x000007ff, 0x00000000,
195         0x240c, 0x000007ff, 0x00000000,
196         0x240c, 0x000007ff, 0x00000000,
197         0x8a14, 0xf000001f, 0x00000007,
198         0x8a14, 0xf000001f, 0x00000007,
199         0x8a14, 0xf000001f, 0x00000007,
200         0x8b24, 0xffffffff, 0x00ffffff,
201         0x8b10, 0x0000ff0f, 0x00000000,
202         0x28a4c, 0x07ffffff, 0x4e000000,
203         0x28350, 0x3f3f3fff, 0x0000124a,
204         0x28350, 0x3f3f3fff, 0x0000124a,
205         0x28350, 0x3f3f3fff, 0x0000124a,
206         0x30, 0x000000ff, 0x0040,
207         0x34, 0x00000040, 0x00004040,
208         0x9100, 0x07ffffff, 0x03000000,
209         0x9100, 0x07ffffff, 0x03000000,
210         0x8e88, 0x01ff1f3f, 0x00000000,
211         0x8e88, 0x01ff1f3f, 0x00000000,
212         0x8e88, 0x01ff1f3f, 0x00000000,
213         0x8e84, 0x01ff1f3f, 0x00000000,
214         0x8e84, 0x01ff1f3f, 0x00000000,
215         0x8e84, 0x01ff1f3f, 0x00000000,
216         0x9060, 0x0000007f, 0x00000020,
217         0x9508, 0x00010000, 0x00010000,
218         0xac14, 0x000003ff, 0x00000003,
219         0xac14, 0x000003ff, 0x00000003,
220         0xac14, 0x000003ff, 0x00000003,
221         0xac10, 0xffffffff, 0x00000000,
222         0xac10, 0xffffffff, 0x00000000,
223         0xac10, 0xffffffff, 0x00000000,
224         0xac0c, 0xffffffff, 0x00001032,
225         0xac0c, 0xffffffff, 0x00001032,
226         0xac0c, 0xffffffff, 0x00001032,
227         0x88d4, 0x0000001f, 0x00000010,
228         0x88d4, 0x0000001f, 0x00000010,
229         0x88d4, 0x0000001f, 0x00000010,
230         0x15c0, 0x000c0fc0, 0x000c0400
231 };
232
233 static const u32 oland_golden_rlc_registers[] =
234 {
235         0xc424, 0xffffffff, 0x00601005,
236         0xc47c, 0xffffffff, 0x10104040,
237         0xc488, 0xffffffff, 0x0100000a,
238         0xc314, 0xffffffff, 0x00000800,
239         0xc30c, 0xffffffff, 0x800000f4
240 };
241
242 static const u32 oland_golden_registers[] =
243 {
244         0x9a10, 0x00010000, 0x00018208,
245         0x9830, 0xffffffff, 0x00000000,
246         0x9834, 0xf00fffff, 0x00000400,
247         0x9838, 0x0002021c, 0x00020200,
248         0xc78, 0x00000080, 0x00000000,
249         0xd030, 0x000300c0, 0x00800040,
250         0xd830, 0x000300c0, 0x00800040,
251         0x5bb0, 0x000000f0, 0x00000070,
252         0x5bc0, 0x00200000, 0x50100000,
253         0x7030, 0x31000311, 0x00000011,
254         0x2ae4, 0x00073ffe, 0x000022a2,
255         0x240c, 0x000007ff, 0x00000000,
256         0x8a14, 0xf000001f, 0x00000007,
257         0x8b24, 0xffffffff, 0x00ffffff,
258         0x8b10, 0x0000ff0f, 0x00000000,
259         0x28a4c, 0x07ffffff, 0x4e000000,
260         0x28350, 0x3f3f3fff, 0x00000082,
261         0x30, 0x000000ff, 0x0040,
262         0x34, 0x00000040, 0x00004040,
263         0x9100, 0x07ffffff, 0x03000000,
264         0x9060, 0x0000007f, 0x00000020,
265         0x9508, 0x00010000, 0x00010000,
266         0xac14, 0x000003ff, 0x000000f3,
267         0xac10, 0xffffffff, 0x00000000,
268         0xac0c, 0xffffffff, 0x00003210,
269         0x88d4, 0x0000001f, 0x00000010,
270         0x15c0, 0x000c0fc0, 0x000c0400
271 };
272
273 static const u32 hainan_golden_registers[] =
274 {
275         0x9a10, 0x00010000, 0x00018208,
276         0x9830, 0xffffffff, 0x00000000,
277         0x9834, 0xf00fffff, 0x00000400,
278         0x9838, 0x0002021c, 0x00020200,
279         0xd0c0, 0xff000fff, 0x00000100,
280         0xd030, 0x000300c0, 0x00800040,
281         0xd8c0, 0xff000fff, 0x00000100,
282         0xd830, 0x000300c0, 0x00800040,
283         0x2ae4, 0x00073ffe, 0x000022a2,
284         0x240c, 0x000007ff, 0x00000000,
285         0x8a14, 0xf000001f, 0x00000007,
286         0x8b24, 0xffffffff, 0x00ffffff,
287         0x8b10, 0x0000ff0f, 0x00000000,
288         0x28a4c, 0x07ffffff, 0x4e000000,
289         0x28350, 0x3f3f3fff, 0x00000000,
290         0x30, 0x000000ff, 0x0040,
291         0x34, 0x00000040, 0x00004040,
292         0x9100, 0x03e00000, 0x03600000,
293         0x9060, 0x0000007f, 0x00000020,
294         0x9508, 0x00010000, 0x00010000,
295         0xac14, 0x000003ff, 0x000000f1,
296         0xac10, 0xffffffff, 0x00000000,
297         0xac0c, 0xffffffff, 0x00003210,
298         0x88d4, 0x0000001f, 0x00000010,
299         0x15c0, 0x000c0fc0, 0x000c0400
300 };
301
302 static const u32 hainan_golden_registers2[] =
303 {
304         0x98f8, 0xffffffff, 0x02010001
305 };
306
307 static const u32 tahiti_mgcg_cgcg_init[] =
308 {
309         0xc400, 0xffffffff, 0xfffffffc,
310         0x802c, 0xffffffff, 0xe0000000,
311         0x9a60, 0xffffffff, 0x00000100,
312         0x92a4, 0xffffffff, 0x00000100,
313         0xc164, 0xffffffff, 0x00000100,
314         0x9774, 0xffffffff, 0x00000100,
315         0x8984, 0xffffffff, 0x06000100,
316         0x8a18, 0xffffffff, 0x00000100,
317         0x92a0, 0xffffffff, 0x00000100,
318         0xc380, 0xffffffff, 0x00000100,
319         0x8b28, 0xffffffff, 0x00000100,
320         0x9144, 0xffffffff, 0x00000100,
321         0x8d88, 0xffffffff, 0x00000100,
322         0x8d8c, 0xffffffff, 0x00000100,
323         0x9030, 0xffffffff, 0x00000100,
324         0x9034, 0xffffffff, 0x00000100,
325         0x9038, 0xffffffff, 0x00000100,
326         0x903c, 0xffffffff, 0x00000100,
327         0xad80, 0xffffffff, 0x00000100,
328         0xac54, 0xffffffff, 0x00000100,
329         0x897c, 0xffffffff, 0x06000100,
330         0x9868, 0xffffffff, 0x00000100,
331         0x9510, 0xffffffff, 0x00000100,
332         0xaf04, 0xffffffff, 0x00000100,
333         0xae04, 0xffffffff, 0x00000100,
334         0x949c, 0xffffffff, 0x00000100,
335         0x802c, 0xffffffff, 0xe0000000,
336         0x9160, 0xffffffff, 0x00010000,
337         0x9164, 0xffffffff, 0x00030002,
338         0x9168, 0xffffffff, 0x00040007,
339         0x916c, 0xffffffff, 0x00060005,
340         0x9170, 0xffffffff, 0x00090008,
341         0x9174, 0xffffffff, 0x00020001,
342         0x9178, 0xffffffff, 0x00040003,
343         0x917c, 0xffffffff, 0x00000007,
344         0x9180, 0xffffffff, 0x00060005,
345         0x9184, 0xffffffff, 0x00090008,
346         0x9188, 0xffffffff, 0x00030002,
347         0x918c, 0xffffffff, 0x00050004,
348         0x9190, 0xffffffff, 0x00000008,
349         0x9194, 0xffffffff, 0x00070006,
350         0x9198, 0xffffffff, 0x000a0009,
351         0x919c, 0xffffffff, 0x00040003,
352         0x91a0, 0xffffffff, 0x00060005,
353         0x91a4, 0xffffffff, 0x00000009,
354         0x91a8, 0xffffffff, 0x00080007,
355         0x91ac, 0xffffffff, 0x000b000a,
356         0x91b0, 0xffffffff, 0x00050004,
357         0x91b4, 0xffffffff, 0x00070006,
358         0x91b8, 0xffffffff, 0x0008000b,
359         0x91bc, 0xffffffff, 0x000a0009,
360         0x91c0, 0xffffffff, 0x000d000c,
361         0x91c4, 0xffffffff, 0x00060005,
362         0x91c8, 0xffffffff, 0x00080007,
363         0x91cc, 0xffffffff, 0x0000000b,
364         0x91d0, 0xffffffff, 0x000a0009,
365         0x91d4, 0xffffffff, 0x000d000c,
366         0x91d8, 0xffffffff, 0x00070006,
367         0x91dc, 0xffffffff, 0x00090008,
368         0x91e0, 0xffffffff, 0x0000000c,
369         0x91e4, 0xffffffff, 0x000b000a,
370         0x91e8, 0xffffffff, 0x000e000d,
371         0x91ec, 0xffffffff, 0x00080007,
372         0x91f0, 0xffffffff, 0x000a0009,
373         0x91f4, 0xffffffff, 0x0000000d,
374         0x91f8, 0xffffffff, 0x000c000b,
375         0x91fc, 0xffffffff, 0x000f000e,
376         0x9200, 0xffffffff, 0x00090008,
377         0x9204, 0xffffffff, 0x000b000a,
378         0x9208, 0xffffffff, 0x000c000f,
379         0x920c, 0xffffffff, 0x000e000d,
380         0x9210, 0xffffffff, 0x00110010,
381         0x9214, 0xffffffff, 0x000a0009,
382         0x9218, 0xffffffff, 0x000c000b,
383         0x921c, 0xffffffff, 0x0000000f,
384         0x9220, 0xffffffff, 0x000e000d,
385         0x9224, 0xffffffff, 0x00110010,
386         0x9228, 0xffffffff, 0x000b000a,
387         0x922c, 0xffffffff, 0x000d000c,
388         0x9230, 0xffffffff, 0x00000010,
389         0x9234, 0xffffffff, 0x000f000e,
390         0x9238, 0xffffffff, 0x00120011,
391         0x923c, 0xffffffff, 0x000c000b,
392         0x9240, 0xffffffff, 0x000e000d,
393         0x9244, 0xffffffff, 0x00000011,
394         0x9248, 0xffffffff, 0x0010000f,
395         0x924c, 0xffffffff, 0x00130012,
396         0x9250, 0xffffffff, 0x000d000c,
397         0x9254, 0xffffffff, 0x000f000e,
398         0x9258, 0xffffffff, 0x00100013,
399         0x925c, 0xffffffff, 0x00120011,
400         0x9260, 0xffffffff, 0x00150014,
401         0x9264, 0xffffffff, 0x000e000d,
402         0x9268, 0xffffffff, 0x0010000f,
403         0x926c, 0xffffffff, 0x00000013,
404         0x9270, 0xffffffff, 0x00120011,
405         0x9274, 0xffffffff, 0x00150014,
406         0x9278, 0xffffffff, 0x000f000e,
407         0x927c, 0xffffffff, 0x00110010,
408         0x9280, 0xffffffff, 0x00000014,
409         0x9284, 0xffffffff, 0x00130012,
410         0x9288, 0xffffffff, 0x00160015,
411         0x928c, 0xffffffff, 0x0010000f,
412         0x9290, 0xffffffff, 0x00120011,
413         0x9294, 0xffffffff, 0x00000015,
414         0x9298, 0xffffffff, 0x00140013,
415         0x929c, 0xffffffff, 0x00170016,
416         0x9150, 0xffffffff, 0x96940200,
417         0x8708, 0xffffffff, 0x00900100,
418         0xc478, 0xffffffff, 0x00000080,
419         0xc404, 0xffffffff, 0x0020003f,
420         0x30, 0xffffffff, 0x0000001c,
421         0x34, 0x000f0000, 0x000f0000,
422         0x160c, 0xffffffff, 0x00000100,
423         0x1024, 0xffffffff, 0x00000100,
424         0x102c, 0x00000101, 0x00000000,
425         0x20a8, 0xffffffff, 0x00000104,
426         0x264c, 0x000c0000, 0x000c0000,
427         0x2648, 0x000c0000, 0x000c0000,
428         0x55e4, 0xff000fff, 0x00000100,
429         0x55e8, 0x00000001, 0x00000001,
430         0x2f50, 0x00000001, 0x00000001,
431         0x30cc, 0xc0000fff, 0x00000104,
432         0xc1e4, 0x00000001, 0x00000001,
433         0xd0c0, 0xfffffff0, 0x00000100,
434         0xd8c0, 0xfffffff0, 0x00000100
435 };
436
437 static const u32 pitcairn_mgcg_cgcg_init[] =
438 {
439         0xc400, 0xffffffff, 0xfffffffc,
440         0x802c, 0xffffffff, 0xe0000000,
441         0x9a60, 0xffffffff, 0x00000100,
442         0x92a4, 0xffffffff, 0x00000100,
443         0xc164, 0xffffffff, 0x00000100,
444         0x9774, 0xffffffff, 0x00000100,
445         0x8984, 0xffffffff, 0x06000100,
446         0x8a18, 0xffffffff, 0x00000100,
447         0x92a0, 0xffffffff, 0x00000100,
448         0xc380, 0xffffffff, 0x00000100,
449         0x8b28, 0xffffffff, 0x00000100,
450         0x9144, 0xffffffff, 0x00000100,
451         0x8d88, 0xffffffff, 0x00000100,
452         0x8d8c, 0xffffffff, 0x00000100,
453         0x9030, 0xffffffff, 0x00000100,
454         0x9034, 0xffffffff, 0x00000100,
455         0x9038, 0xffffffff, 0x00000100,
456         0x903c, 0xffffffff, 0x00000100,
457         0xad80, 0xffffffff, 0x00000100,
458         0xac54, 0xffffffff, 0x00000100,
459         0x897c, 0xffffffff, 0x06000100,
460         0x9868, 0xffffffff, 0x00000100,
461         0x9510, 0xffffffff, 0x00000100,
462         0xaf04, 0xffffffff, 0x00000100,
463         0xae04, 0xffffffff, 0x00000100,
464         0x949c, 0xffffffff, 0x00000100,
465         0x802c, 0xffffffff, 0xe0000000,
466         0x9160, 0xffffffff, 0x00010000,
467         0x9164, 0xffffffff, 0x00030002,
468         0x9168, 0xffffffff, 0x00040007,
469         0x916c, 0xffffffff, 0x00060005,
470         0x9170, 0xffffffff, 0x00090008,
471         0x9174, 0xffffffff, 0x00020001,
472         0x9178, 0xffffffff, 0x00040003,
473         0x917c, 0xffffffff, 0x00000007,
474         0x9180, 0xffffffff, 0x00060005,
475         0x9184, 0xffffffff, 0x00090008,
476         0x9188, 0xffffffff, 0x00030002,
477         0x918c, 0xffffffff, 0x00050004,
478         0x9190, 0xffffffff, 0x00000008,
479         0x9194, 0xffffffff, 0x00070006,
480         0x9198, 0xffffffff, 0x000a0009,
481         0x919c, 0xffffffff, 0x00040003,
482         0x91a0, 0xffffffff, 0x00060005,
483         0x91a4, 0xffffffff, 0x00000009,
484         0x91a8, 0xffffffff, 0x00080007,
485         0x91ac, 0xffffffff, 0x000b000a,
486         0x91b0, 0xffffffff, 0x00050004,
487         0x91b4, 0xffffffff, 0x00070006,
488         0x91b8, 0xffffffff, 0x0008000b,
489         0x91bc, 0xffffffff, 0x000a0009,
490         0x91c0, 0xffffffff, 0x000d000c,
491         0x9200, 0xffffffff, 0x00090008,
492         0x9204, 0xffffffff, 0x000b000a,
493         0x9208, 0xffffffff, 0x000c000f,
494         0x920c, 0xffffffff, 0x000e000d,
495         0x9210, 0xffffffff, 0x00110010,
496         0x9214, 0xffffffff, 0x000a0009,
497         0x9218, 0xffffffff, 0x000c000b,
498         0x921c, 0xffffffff, 0x0000000f,
499         0x9220, 0xffffffff, 0x000e000d,
500         0x9224, 0xffffffff, 0x00110010,
501         0x9228, 0xffffffff, 0x000b000a,
502         0x922c, 0xffffffff, 0x000d000c,
503         0x9230, 0xffffffff, 0x00000010,
504         0x9234, 0xffffffff, 0x000f000e,
505         0x9238, 0xffffffff, 0x00120011,
506         0x923c, 0xffffffff, 0x000c000b,
507         0x9240, 0xffffffff, 0x000e000d,
508         0x9244, 0xffffffff, 0x00000011,
509         0x9248, 0xffffffff, 0x0010000f,
510         0x924c, 0xffffffff, 0x00130012,
511         0x9250, 0xffffffff, 0x000d000c,
512         0x9254, 0xffffffff, 0x000f000e,
513         0x9258, 0xffffffff, 0x00100013,
514         0x925c, 0xffffffff, 0x00120011,
515         0x9260, 0xffffffff, 0x00150014,
516         0x9150, 0xffffffff, 0x96940200,
517         0x8708, 0xffffffff, 0x00900100,
518         0xc478, 0xffffffff, 0x00000080,
519         0xc404, 0xffffffff, 0x0020003f,
520         0x30, 0xffffffff, 0x0000001c,
521         0x34, 0x000f0000, 0x000f0000,
522         0x160c, 0xffffffff, 0x00000100,
523         0x1024, 0xffffffff, 0x00000100,
524         0x102c, 0x00000101, 0x00000000,
525         0x20a8, 0xffffffff, 0x00000104,
526         0x55e4, 0xff000fff, 0x00000100,
527         0x55e8, 0x00000001, 0x00000001,
528         0x2f50, 0x00000001, 0x00000001,
529         0x30cc, 0xc0000fff, 0x00000104,
530         0xc1e4, 0x00000001, 0x00000001,
531         0xd0c0, 0xfffffff0, 0x00000100,
532         0xd8c0, 0xfffffff0, 0x00000100
533 };
534
535 static const u32 verde_mgcg_cgcg_init[] =
536 {
537         0xc400, 0xffffffff, 0xfffffffc,
538         0x802c, 0xffffffff, 0xe0000000,
539         0x9a60, 0xffffffff, 0x00000100,
540         0x92a4, 0xffffffff, 0x00000100,
541         0xc164, 0xffffffff, 0x00000100,
542         0x9774, 0xffffffff, 0x00000100,
543         0x8984, 0xffffffff, 0x06000100,
544         0x8a18, 0xffffffff, 0x00000100,
545         0x92a0, 0xffffffff, 0x00000100,
546         0xc380, 0xffffffff, 0x00000100,
547         0x8b28, 0xffffffff, 0x00000100,
548         0x9144, 0xffffffff, 0x00000100,
549         0x8d88, 0xffffffff, 0x00000100,
550         0x8d8c, 0xffffffff, 0x00000100,
551         0x9030, 0xffffffff, 0x00000100,
552         0x9034, 0xffffffff, 0x00000100,
553         0x9038, 0xffffffff, 0x00000100,
554         0x903c, 0xffffffff, 0x00000100,
555         0xad80, 0xffffffff, 0x00000100,
556         0xac54, 0xffffffff, 0x00000100,
557         0x897c, 0xffffffff, 0x06000100,
558         0x9868, 0xffffffff, 0x00000100,
559         0x9510, 0xffffffff, 0x00000100,
560         0xaf04, 0xffffffff, 0x00000100,
561         0xae04, 0xffffffff, 0x00000100,
562         0x949c, 0xffffffff, 0x00000100,
563         0x802c, 0xffffffff, 0xe0000000,
564         0x9160, 0xffffffff, 0x00010000,
565         0x9164, 0xffffffff, 0x00030002,
566         0x9168, 0xffffffff, 0x00040007,
567         0x916c, 0xffffffff, 0x00060005,
568         0x9170, 0xffffffff, 0x00090008,
569         0x9174, 0xffffffff, 0x00020001,
570         0x9178, 0xffffffff, 0x00040003,
571         0x917c, 0xffffffff, 0x00000007,
572         0x9180, 0xffffffff, 0x00060005,
573         0x9184, 0xffffffff, 0x00090008,
574         0x9188, 0xffffffff, 0x00030002,
575         0x918c, 0xffffffff, 0x00050004,
576         0x9190, 0xffffffff, 0x00000008,
577         0x9194, 0xffffffff, 0x00070006,
578         0x9198, 0xffffffff, 0x000a0009,
579         0x919c, 0xffffffff, 0x00040003,
580         0x91a0, 0xffffffff, 0x00060005,
581         0x91a4, 0xffffffff, 0x00000009,
582         0x91a8, 0xffffffff, 0x00080007,
583         0x91ac, 0xffffffff, 0x000b000a,
584         0x91b0, 0xffffffff, 0x00050004,
585         0x91b4, 0xffffffff, 0x00070006,
586         0x91b8, 0xffffffff, 0x0008000b,
587         0x91bc, 0xffffffff, 0x000a0009,
588         0x91c0, 0xffffffff, 0x000d000c,
589         0x9200, 0xffffffff, 0x00090008,
590         0x9204, 0xffffffff, 0x000b000a,
591         0x9208, 0xffffffff, 0x000c000f,
592         0x920c, 0xffffffff, 0x000e000d,
593         0x9210, 0xffffffff, 0x00110010,
594         0x9214, 0xffffffff, 0x000a0009,
595         0x9218, 0xffffffff, 0x000c000b,
596         0x921c, 0xffffffff, 0x0000000f,
597         0x9220, 0xffffffff, 0x000e000d,
598         0x9224, 0xffffffff, 0x00110010,
599         0x9228, 0xffffffff, 0x000b000a,
600         0x922c, 0xffffffff, 0x000d000c,
601         0x9230, 0xffffffff, 0x00000010,
602         0x9234, 0xffffffff, 0x000f000e,
603         0x9238, 0xffffffff, 0x00120011,
604         0x923c, 0xffffffff, 0x000c000b,
605         0x9240, 0xffffffff, 0x000e000d,
606         0x9244, 0xffffffff, 0x00000011,
607         0x9248, 0xffffffff, 0x0010000f,
608         0x924c, 0xffffffff, 0x00130012,
609         0x9250, 0xffffffff, 0x000d000c,
610         0x9254, 0xffffffff, 0x000f000e,
611         0x9258, 0xffffffff, 0x00100013,
612         0x925c, 0xffffffff, 0x00120011,
613         0x9260, 0xffffffff, 0x00150014,
614         0x9150, 0xffffffff, 0x96940200,
615         0x8708, 0xffffffff, 0x00900100,
616         0xc478, 0xffffffff, 0x00000080,
617         0xc404, 0xffffffff, 0x0020003f,
618         0x30, 0xffffffff, 0x0000001c,
619         0x34, 0x000f0000, 0x000f0000,
620         0x160c, 0xffffffff, 0x00000100,
621         0x1024, 0xffffffff, 0x00000100,
622         0x102c, 0x00000101, 0x00000000,
623         0x20a8, 0xffffffff, 0x00000104,
624         0x264c, 0x000c0000, 0x000c0000,
625         0x2648, 0x000c0000, 0x000c0000,
626         0x55e4, 0xff000fff, 0x00000100,
627         0x55e8, 0x00000001, 0x00000001,
628         0x2f50, 0x00000001, 0x00000001,
629         0x30cc, 0xc0000fff, 0x00000104,
630         0xc1e4, 0x00000001, 0x00000001,
631         0xd0c0, 0xfffffff0, 0x00000100,
632         0xd8c0, 0xfffffff0, 0x00000100
633 };
634
635 static const u32 oland_mgcg_cgcg_init[] =
636 {
637         0xc400, 0xffffffff, 0xfffffffc,
638         0x802c, 0xffffffff, 0xe0000000,
639         0x9a60, 0xffffffff, 0x00000100,
640         0x92a4, 0xffffffff, 0x00000100,
641         0xc164, 0xffffffff, 0x00000100,
642         0x9774, 0xffffffff, 0x00000100,
643         0x8984, 0xffffffff, 0x06000100,
644         0x8a18, 0xffffffff, 0x00000100,
645         0x92a0, 0xffffffff, 0x00000100,
646         0xc380, 0xffffffff, 0x00000100,
647         0x8b28, 0xffffffff, 0x00000100,
648         0x9144, 0xffffffff, 0x00000100,
649         0x8d88, 0xffffffff, 0x00000100,
650         0x8d8c, 0xffffffff, 0x00000100,
651         0x9030, 0xffffffff, 0x00000100,
652         0x9034, 0xffffffff, 0x00000100,
653         0x9038, 0xffffffff, 0x00000100,
654         0x903c, 0xffffffff, 0x00000100,
655         0xad80, 0xffffffff, 0x00000100,
656         0xac54, 0xffffffff, 0x00000100,
657         0x897c, 0xffffffff, 0x06000100,
658         0x9868, 0xffffffff, 0x00000100,
659         0x9510, 0xffffffff, 0x00000100,
660         0xaf04, 0xffffffff, 0x00000100,
661         0xae04, 0xffffffff, 0x00000100,
662         0x949c, 0xffffffff, 0x00000100,
663         0x802c, 0xffffffff, 0xe0000000,
664         0x9160, 0xffffffff, 0x00010000,
665         0x9164, 0xffffffff, 0x00030002,
666         0x9168, 0xffffffff, 0x00040007,
667         0x916c, 0xffffffff, 0x00060005,
668         0x9170, 0xffffffff, 0x00090008,
669         0x9174, 0xffffffff, 0x00020001,
670         0x9178, 0xffffffff, 0x00040003,
671         0x917c, 0xffffffff, 0x00000007,
672         0x9180, 0xffffffff, 0x00060005,
673         0x9184, 0xffffffff, 0x00090008,
674         0x9188, 0xffffffff, 0x00030002,
675         0x918c, 0xffffffff, 0x00050004,
676         0x9190, 0xffffffff, 0x00000008,
677         0x9194, 0xffffffff, 0x00070006,
678         0x9198, 0xffffffff, 0x000a0009,
679         0x919c, 0xffffffff, 0x00040003,
680         0x91a0, 0xffffffff, 0x00060005,
681         0x91a4, 0xffffffff, 0x00000009,
682         0x91a8, 0xffffffff, 0x00080007,
683         0x91ac, 0xffffffff, 0x000b000a,
684         0x91b0, 0xffffffff, 0x00050004,
685         0x91b4, 0xffffffff, 0x00070006,
686         0x91b8, 0xffffffff, 0x0008000b,
687         0x91bc, 0xffffffff, 0x000a0009,
688         0x91c0, 0xffffffff, 0x000d000c,
689         0x91c4, 0xffffffff, 0x00060005,
690         0x91c8, 0xffffffff, 0x00080007,
691         0x91cc, 0xffffffff, 0x0000000b,
692         0x91d0, 0xffffffff, 0x000a0009,
693         0x91d4, 0xffffffff, 0x000d000c,
694         0x9150, 0xffffffff, 0x96940200,
695         0x8708, 0xffffffff, 0x00900100,
696         0xc478, 0xffffffff, 0x00000080,
697         0xc404, 0xffffffff, 0x0020003f,
698         0x30, 0xffffffff, 0x0000001c,
699         0x34, 0x000f0000, 0x000f0000,
700         0x160c, 0xffffffff, 0x00000100,
701         0x1024, 0xffffffff, 0x00000100,
702         0x102c, 0x00000101, 0x00000000,
703         0x20a8, 0xffffffff, 0x00000104,
704         0x264c, 0x000c0000, 0x000c0000,
705         0x2648, 0x000c0000, 0x000c0000,
706         0x55e4, 0xff000fff, 0x00000100,
707         0x55e8, 0x00000001, 0x00000001,
708         0x2f50, 0x00000001, 0x00000001,
709         0x30cc, 0xc0000fff, 0x00000104,
710         0xc1e4, 0x00000001, 0x00000001,
711         0xd0c0, 0xfffffff0, 0x00000100,
712         0xd8c0, 0xfffffff0, 0x00000100
713 };
714
715 static const u32 hainan_mgcg_cgcg_init[] =
716 {
717         0xc400, 0xffffffff, 0xfffffffc,
718         0x802c, 0xffffffff, 0xe0000000,
719         0x9a60, 0xffffffff, 0x00000100,
720         0x92a4, 0xffffffff, 0x00000100,
721         0xc164, 0xffffffff, 0x00000100,
722         0x9774, 0xffffffff, 0x00000100,
723         0x8984, 0xffffffff, 0x06000100,
724         0x8a18, 0xffffffff, 0x00000100,
725         0x92a0, 0xffffffff, 0x00000100,
726         0xc380, 0xffffffff, 0x00000100,
727         0x8b28, 0xffffffff, 0x00000100,
728         0x9144, 0xffffffff, 0x00000100,
729         0x8d88, 0xffffffff, 0x00000100,
730         0x8d8c, 0xffffffff, 0x00000100,
731         0x9030, 0xffffffff, 0x00000100,
732         0x9034, 0xffffffff, 0x00000100,
733         0x9038, 0xffffffff, 0x00000100,
734         0x903c, 0xffffffff, 0x00000100,
735         0xad80, 0xffffffff, 0x00000100,
736         0xac54, 0xffffffff, 0x00000100,
737         0x897c, 0xffffffff, 0x06000100,
738         0x9868, 0xffffffff, 0x00000100,
739         0x9510, 0xffffffff, 0x00000100,
740         0xaf04, 0xffffffff, 0x00000100,
741         0xae04, 0xffffffff, 0x00000100,
742         0x949c, 0xffffffff, 0x00000100,
743         0x802c, 0xffffffff, 0xe0000000,
744         0x9160, 0xffffffff, 0x00010000,
745         0x9164, 0xffffffff, 0x00030002,
746         0x9168, 0xffffffff, 0x00040007,
747         0x916c, 0xffffffff, 0x00060005,
748         0x9170, 0xffffffff, 0x00090008,
749         0x9174, 0xffffffff, 0x00020001,
750         0x9178, 0xffffffff, 0x00040003,
751         0x917c, 0xffffffff, 0x00000007,
752         0x9180, 0xffffffff, 0x00060005,
753         0x9184, 0xffffffff, 0x00090008,
754         0x9188, 0xffffffff, 0x00030002,
755         0x918c, 0xffffffff, 0x00050004,
756         0x9190, 0xffffffff, 0x00000008,
757         0x9194, 0xffffffff, 0x00070006,
758         0x9198, 0xffffffff, 0x000a0009,
759         0x919c, 0xffffffff, 0x00040003,
760         0x91a0, 0xffffffff, 0x00060005,
761         0x91a4, 0xffffffff, 0x00000009,
762         0x91a8, 0xffffffff, 0x00080007,
763         0x91ac, 0xffffffff, 0x000b000a,
764         0x91b0, 0xffffffff, 0x00050004,
765         0x91b4, 0xffffffff, 0x00070006,
766         0x91b8, 0xffffffff, 0x0008000b,
767         0x91bc, 0xffffffff, 0x000a0009,
768         0x91c0, 0xffffffff, 0x000d000c,
769         0x91c4, 0xffffffff, 0x00060005,
770         0x91c8, 0xffffffff, 0x00080007,
771         0x91cc, 0xffffffff, 0x0000000b,
772         0x91d0, 0xffffffff, 0x000a0009,
773         0x91d4, 0xffffffff, 0x000d000c,
774         0x9150, 0xffffffff, 0x96940200,
775         0x8708, 0xffffffff, 0x00900100,
776         0xc478, 0xffffffff, 0x00000080,
777         0xc404, 0xffffffff, 0x0020003f,
778         0x30, 0xffffffff, 0x0000001c,
779         0x34, 0x000f0000, 0x000f0000,
780         0x160c, 0xffffffff, 0x00000100,
781         0x1024, 0xffffffff, 0x00000100,
782         0x20a8, 0xffffffff, 0x00000104,
783         0x264c, 0x000c0000, 0x000c0000,
784         0x2648, 0x000c0000, 0x000c0000,
785         0x2f50, 0x00000001, 0x00000001,
786         0x30cc, 0xc0000fff, 0x00000104,
787         0xc1e4, 0x00000001, 0x00000001,
788         0xd0c0, 0xfffffff0, 0x00000100,
789         0xd8c0, 0xfffffff0, 0x00000100
790 };
791
792 static u32 verde_pg_init[] =
793 {
794         0x353c, 0xffffffff, 0x40000,
795         0x3538, 0xffffffff, 0x200010ff,
796         0x353c, 0xffffffff, 0x0,
797         0x353c, 0xffffffff, 0x0,
798         0x353c, 0xffffffff, 0x0,
799         0x353c, 0xffffffff, 0x0,
800         0x353c, 0xffffffff, 0x0,
801         0x353c, 0xffffffff, 0x7007,
802         0x3538, 0xffffffff, 0x300010ff,
803         0x353c, 0xffffffff, 0x0,
804         0x353c, 0xffffffff, 0x0,
805         0x353c, 0xffffffff, 0x0,
806         0x353c, 0xffffffff, 0x0,
807         0x353c, 0xffffffff, 0x0,
808         0x353c, 0xffffffff, 0x400000,
809         0x3538, 0xffffffff, 0x100010ff,
810         0x353c, 0xffffffff, 0x0,
811         0x353c, 0xffffffff, 0x0,
812         0x353c, 0xffffffff, 0x0,
813         0x353c, 0xffffffff, 0x0,
814         0x353c, 0xffffffff, 0x0,
815         0x353c, 0xffffffff, 0x120200,
816         0x3538, 0xffffffff, 0x500010ff,
817         0x353c, 0xffffffff, 0x0,
818         0x353c, 0xffffffff, 0x0,
819         0x353c, 0xffffffff, 0x0,
820         0x353c, 0xffffffff, 0x0,
821         0x353c, 0xffffffff, 0x0,
822         0x353c, 0xffffffff, 0x1e1e16,
823         0x3538, 0xffffffff, 0x600010ff,
824         0x353c, 0xffffffff, 0x0,
825         0x353c, 0xffffffff, 0x0,
826         0x353c, 0xffffffff, 0x0,
827         0x353c, 0xffffffff, 0x0,
828         0x353c, 0xffffffff, 0x0,
829         0x353c, 0xffffffff, 0x171f1e,
830         0x3538, 0xffffffff, 0x700010ff,
831         0x353c, 0xffffffff, 0x0,
832         0x353c, 0xffffffff, 0x0,
833         0x353c, 0xffffffff, 0x0,
834         0x353c, 0xffffffff, 0x0,
835         0x353c, 0xffffffff, 0x0,
836         0x353c, 0xffffffff, 0x0,
837         0x3538, 0xffffffff, 0x9ff,
838         0x3500, 0xffffffff, 0x0,
839         0x3504, 0xffffffff, 0x10000800,
840         0x3504, 0xffffffff, 0xf,
841         0x3504, 0xffffffff, 0xf,
842         0x3500, 0xffffffff, 0x4,
843         0x3504, 0xffffffff, 0x1000051e,
844         0x3504, 0xffffffff, 0xffff,
845         0x3504, 0xffffffff, 0xffff,
846         0x3500, 0xffffffff, 0x8,
847         0x3504, 0xffffffff, 0x80500,
848         0x3500, 0xffffffff, 0x12,
849         0x3504, 0xffffffff, 0x9050c,
850         0x3500, 0xffffffff, 0x1d,
851         0x3504, 0xffffffff, 0xb052c,
852         0x3500, 0xffffffff, 0x2a,
853         0x3504, 0xffffffff, 0x1053e,
854         0x3500, 0xffffffff, 0x2d,
855         0x3504, 0xffffffff, 0x10546,
856         0x3500, 0xffffffff, 0x30,
857         0x3504, 0xffffffff, 0xa054e,
858         0x3500, 0xffffffff, 0x3c,
859         0x3504, 0xffffffff, 0x1055f,
860         0x3500, 0xffffffff, 0x3f,
861         0x3504, 0xffffffff, 0x10567,
862         0x3500, 0xffffffff, 0x42,
863         0x3504, 0xffffffff, 0x1056f,
864         0x3500, 0xffffffff, 0x45,
865         0x3504, 0xffffffff, 0x10572,
866         0x3500, 0xffffffff, 0x48,
867         0x3504, 0xffffffff, 0x20575,
868         0x3500, 0xffffffff, 0x4c,
869         0x3504, 0xffffffff, 0x190801,
870         0x3500, 0xffffffff, 0x67,
871         0x3504, 0xffffffff, 0x1082a,
872         0x3500, 0xffffffff, 0x6a,
873         0x3504, 0xffffffff, 0x1b082d,
874         0x3500, 0xffffffff, 0x87,
875         0x3504, 0xffffffff, 0x310851,
876         0x3500, 0xffffffff, 0xba,
877         0x3504, 0xffffffff, 0x891,
878         0x3500, 0xffffffff, 0xbc,
879         0x3504, 0xffffffff, 0x893,
880         0x3500, 0xffffffff, 0xbe,
881         0x3504, 0xffffffff, 0x20895,
882         0x3500, 0xffffffff, 0xc2,
883         0x3504, 0xffffffff, 0x20899,
884         0x3500, 0xffffffff, 0xc6,
885         0x3504, 0xffffffff, 0x2089d,
886         0x3500, 0xffffffff, 0xca,
887         0x3504, 0xffffffff, 0x8a1,
888         0x3500, 0xffffffff, 0xcc,
889         0x3504, 0xffffffff, 0x8a3,
890         0x3500, 0xffffffff, 0xce,
891         0x3504, 0xffffffff, 0x308a5,
892         0x3500, 0xffffffff, 0xd3,
893         0x3504, 0xffffffff, 0x6d08cd,
894         0x3500, 0xffffffff, 0x142,
895         0x3504, 0xffffffff, 0x2000095a,
896         0x3504, 0xffffffff, 0x1,
897         0x3500, 0xffffffff, 0x144,
898         0x3504, 0xffffffff, 0x301f095b,
899         0x3500, 0xffffffff, 0x165,
900         0x3504, 0xffffffff, 0xc094d,
901         0x3500, 0xffffffff, 0x173,
902         0x3504, 0xffffffff, 0xf096d,
903         0x3500, 0xffffffff, 0x184,
904         0x3504, 0xffffffff, 0x15097f,
905         0x3500, 0xffffffff, 0x19b,
906         0x3504, 0xffffffff, 0xc0998,
907         0x3500, 0xffffffff, 0x1a9,
908         0x3504, 0xffffffff, 0x409a7,
909         0x3500, 0xffffffff, 0x1af,
910         0x3504, 0xffffffff, 0xcdc,
911         0x3500, 0xffffffff, 0x1b1,
912         0x3504, 0xffffffff, 0x800,
913         0x3508, 0xffffffff, 0x6c9b2000,
914         0x3510, 0xfc00, 0x2000,
915         0x3544, 0xffffffff, 0xfc0,
916         0x28d4, 0x00000100, 0x100
917 };
918
919 static void si_init_golden_registers(struct radeon_device *rdev)
920 {
921         switch (rdev->family) {
922         case CHIP_TAHITI:
923                 radeon_program_register_sequence(rdev,
924                                                  tahiti_golden_registers,
925                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
926                 radeon_program_register_sequence(rdev,
927                                                  tahiti_golden_rlc_registers,
928                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
929                 radeon_program_register_sequence(rdev,
930                                                  tahiti_mgcg_cgcg_init,
931                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
932                 radeon_program_register_sequence(rdev,
933                                                  tahiti_golden_registers2,
934                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
935                 break;
936         case CHIP_PITCAIRN:
937                 radeon_program_register_sequence(rdev,
938                                                  pitcairn_golden_registers,
939                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
940                 radeon_program_register_sequence(rdev,
941                                                  pitcairn_golden_rlc_registers,
942                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
943                 radeon_program_register_sequence(rdev,
944                                                  pitcairn_mgcg_cgcg_init,
945                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
946                 break;
947         case CHIP_VERDE:
948                 radeon_program_register_sequence(rdev,
949                                                  verde_golden_registers,
950                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
951                 radeon_program_register_sequence(rdev,
952                                                  verde_golden_rlc_registers,
953                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
954                 radeon_program_register_sequence(rdev,
955                                                  verde_mgcg_cgcg_init,
956                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
957                 radeon_program_register_sequence(rdev,
958                                                  verde_pg_init,
959                                                  (const u32)ARRAY_SIZE(verde_pg_init));
960                 break;
961         case CHIP_OLAND:
962                 radeon_program_register_sequence(rdev,
963                                                  oland_golden_registers,
964                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
965                 radeon_program_register_sequence(rdev,
966                                                  oland_golden_rlc_registers,
967                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
968                 radeon_program_register_sequence(rdev,
969                                                  oland_mgcg_cgcg_init,
970                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
971                 break;
972         case CHIP_HAINAN:
973                 radeon_program_register_sequence(rdev,
974                                                  hainan_golden_registers,
975                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
976                 radeon_program_register_sequence(rdev,
977                                                  hainan_golden_registers2,
978                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
979                 radeon_program_register_sequence(rdev,
980                                                  hainan_mgcg_cgcg_init,
981                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
982                 break;
983         default:
984                 break;
985         }
986 }
987
988 #define PCIE_BUS_CLK                10000
989 #define TCLK                        (PCIE_BUS_CLK / 10)
990
991 /**
992  * si_get_xclk - get the xclk
993  *
994  * @rdev: radeon_device pointer
995  *
996  * Returns the reference clock used by the gfx engine
997  * (SI).
998  */
999 u32 si_get_xclk(struct radeon_device *rdev)
1000 {
1001         u32 reference_clock = rdev->clock.spll.reference_freq;
1002         u32 tmp;
1003
1004         tmp = RREG32(CG_CLKPIN_CNTL_2);
1005         if (tmp & MUX_TCLK_TO_XCLK)
1006                 return TCLK;
1007
1008         tmp = RREG32(CG_CLKPIN_CNTL);
1009         if (tmp & XTALIN_DIVIDE)
1010                 return reference_clock / 4;
1011
1012         return reference_clock;
1013 }
1014
1015 /* get temperature in millidegrees */
1016 int si_get_temp(struct radeon_device *rdev)
1017 {
1018         u32 temp;
1019         int actual_temp = 0;
1020
1021         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1022                 CTF_TEMP_SHIFT;
1023
1024         if (temp & 0x200)
1025                 actual_temp = 255;
1026         else
1027                 actual_temp = temp & 0x1ff;
1028
1029         actual_temp = (actual_temp * 1000);
1030
1031         return actual_temp;
1032 }
1033
1034 #define TAHITI_IO_MC_REGS_SIZE 36
1035
1036 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1037         {0x0000006f, 0x03044000},
1038         {0x00000070, 0x0480c018},
1039         {0x00000071, 0x00000040},
1040         {0x00000072, 0x01000000},
1041         {0x00000074, 0x000000ff},
1042         {0x00000075, 0x00143400},
1043         {0x00000076, 0x08ec0800},
1044         {0x00000077, 0x040000cc},
1045         {0x00000079, 0x00000000},
1046         {0x0000007a, 0x21000409},
1047         {0x0000007c, 0x00000000},
1048         {0x0000007d, 0xe8000000},
1049         {0x0000007e, 0x044408a8},
1050         {0x0000007f, 0x00000003},
1051         {0x00000080, 0x00000000},
1052         {0x00000081, 0x01000000},
1053         {0x00000082, 0x02000000},
1054         {0x00000083, 0x00000000},
1055         {0x00000084, 0xe3f3e4f4},
1056         {0x00000085, 0x00052024},
1057         {0x00000087, 0x00000000},
1058         {0x00000088, 0x66036603},
1059         {0x00000089, 0x01000000},
1060         {0x0000008b, 0x1c0a0000},
1061         {0x0000008c, 0xff010000},
1062         {0x0000008e, 0xffffefff},
1063         {0x0000008f, 0xfff3efff},
1064         {0x00000090, 0xfff3efbf},
1065         {0x00000094, 0x00101101},
1066         {0x00000095, 0x00000fff},
1067         {0x00000096, 0x00116fff},
1068         {0x00000097, 0x60010000},
1069         {0x00000098, 0x10010000},
1070         {0x00000099, 0x00006000},
1071         {0x0000009a, 0x00001000},
1072         {0x0000009f, 0x00a77400}
1073 };
1074
1075 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1076         {0x0000006f, 0x03044000},
1077         {0x00000070, 0x0480c018},
1078         {0x00000071, 0x00000040},
1079         {0x00000072, 0x01000000},
1080         {0x00000074, 0x000000ff},
1081         {0x00000075, 0x00143400},
1082         {0x00000076, 0x08ec0800},
1083         {0x00000077, 0x040000cc},
1084         {0x00000079, 0x00000000},
1085         {0x0000007a, 0x21000409},
1086         {0x0000007c, 0x00000000},
1087         {0x0000007d, 0xe8000000},
1088         {0x0000007e, 0x044408a8},
1089         {0x0000007f, 0x00000003},
1090         {0x00000080, 0x00000000},
1091         {0x00000081, 0x01000000},
1092         {0x00000082, 0x02000000},
1093         {0x00000083, 0x00000000},
1094         {0x00000084, 0xe3f3e4f4},
1095         {0x00000085, 0x00052024},
1096         {0x00000087, 0x00000000},
1097         {0x00000088, 0x66036603},
1098         {0x00000089, 0x01000000},
1099         {0x0000008b, 0x1c0a0000},
1100         {0x0000008c, 0xff010000},
1101         {0x0000008e, 0xffffefff},
1102         {0x0000008f, 0xfff3efff},
1103         {0x00000090, 0xfff3efbf},
1104         {0x00000094, 0x00101101},
1105         {0x00000095, 0x00000fff},
1106         {0x00000096, 0x00116fff},
1107         {0x00000097, 0x60010000},
1108         {0x00000098, 0x10010000},
1109         {0x00000099, 0x00006000},
1110         {0x0000009a, 0x00001000},
1111         {0x0000009f, 0x00a47400}
1112 };
1113
1114 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1115         {0x0000006f, 0x03044000},
1116         {0x00000070, 0x0480c018},
1117         {0x00000071, 0x00000040},
1118         {0x00000072, 0x01000000},
1119         {0x00000074, 0x000000ff},
1120         {0x00000075, 0x00143400},
1121         {0x00000076, 0x08ec0800},
1122         {0x00000077, 0x040000cc},
1123         {0x00000079, 0x00000000},
1124         {0x0000007a, 0x21000409},
1125         {0x0000007c, 0x00000000},
1126         {0x0000007d, 0xe8000000},
1127         {0x0000007e, 0x044408a8},
1128         {0x0000007f, 0x00000003},
1129         {0x00000080, 0x00000000},
1130         {0x00000081, 0x01000000},
1131         {0x00000082, 0x02000000},
1132         {0x00000083, 0x00000000},
1133         {0x00000084, 0xe3f3e4f4},
1134         {0x00000085, 0x00052024},
1135         {0x00000087, 0x00000000},
1136         {0x00000088, 0x66036603},
1137         {0x00000089, 0x01000000},
1138         {0x0000008b, 0x1c0a0000},
1139         {0x0000008c, 0xff010000},
1140         {0x0000008e, 0xffffefff},
1141         {0x0000008f, 0xfff3efff},
1142         {0x00000090, 0xfff3efbf},
1143         {0x00000094, 0x00101101},
1144         {0x00000095, 0x00000fff},
1145         {0x00000096, 0x00116fff},
1146         {0x00000097, 0x60010000},
1147         {0x00000098, 0x10010000},
1148         {0x00000099, 0x00006000},
1149         {0x0000009a, 0x00001000},
1150         {0x0000009f, 0x00a37400}
1151 };
1152
1153 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1154         {0x0000006f, 0x03044000},
1155         {0x00000070, 0x0480c018},
1156         {0x00000071, 0x00000040},
1157         {0x00000072, 0x01000000},
1158         {0x00000074, 0x000000ff},
1159         {0x00000075, 0x00143400},
1160         {0x00000076, 0x08ec0800},
1161         {0x00000077, 0x040000cc},
1162         {0x00000079, 0x00000000},
1163         {0x0000007a, 0x21000409},
1164         {0x0000007c, 0x00000000},
1165         {0x0000007d, 0xe8000000},
1166         {0x0000007e, 0x044408a8},
1167         {0x0000007f, 0x00000003},
1168         {0x00000080, 0x00000000},
1169         {0x00000081, 0x01000000},
1170         {0x00000082, 0x02000000},
1171         {0x00000083, 0x00000000},
1172         {0x00000084, 0xe3f3e4f4},
1173         {0x00000085, 0x00052024},
1174         {0x00000087, 0x00000000},
1175         {0x00000088, 0x66036603},
1176         {0x00000089, 0x01000000},
1177         {0x0000008b, 0x1c0a0000},
1178         {0x0000008c, 0xff010000},
1179         {0x0000008e, 0xffffefff},
1180         {0x0000008f, 0xfff3efff},
1181         {0x00000090, 0xfff3efbf},
1182         {0x00000094, 0x00101101},
1183         {0x00000095, 0x00000fff},
1184         {0x00000096, 0x00116fff},
1185         {0x00000097, 0x60010000},
1186         {0x00000098, 0x10010000},
1187         {0x00000099, 0x00006000},
1188         {0x0000009a, 0x00001000},
1189         {0x0000009f, 0x00a17730}
1190 };
1191
1192 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1193         {0x0000006f, 0x03044000},
1194         {0x00000070, 0x0480c018},
1195         {0x00000071, 0x00000040},
1196         {0x00000072, 0x01000000},
1197         {0x00000074, 0x000000ff},
1198         {0x00000075, 0x00143400},
1199         {0x00000076, 0x08ec0800},
1200         {0x00000077, 0x040000cc},
1201         {0x00000079, 0x00000000},
1202         {0x0000007a, 0x21000409},
1203         {0x0000007c, 0x00000000},
1204         {0x0000007d, 0xe8000000},
1205         {0x0000007e, 0x044408a8},
1206         {0x0000007f, 0x00000003},
1207         {0x00000080, 0x00000000},
1208         {0x00000081, 0x01000000},
1209         {0x00000082, 0x02000000},
1210         {0x00000083, 0x00000000},
1211         {0x00000084, 0xe3f3e4f4},
1212         {0x00000085, 0x00052024},
1213         {0x00000087, 0x00000000},
1214         {0x00000088, 0x66036603},
1215         {0x00000089, 0x01000000},
1216         {0x0000008b, 0x1c0a0000},
1217         {0x0000008c, 0xff010000},
1218         {0x0000008e, 0xffffefff},
1219         {0x0000008f, 0xfff3efff},
1220         {0x00000090, 0xfff3efbf},
1221         {0x00000094, 0x00101101},
1222         {0x00000095, 0x00000fff},
1223         {0x00000096, 0x00116fff},
1224         {0x00000097, 0x60010000},
1225         {0x00000098, 0x10010000},
1226         {0x00000099, 0x00006000},
1227         {0x0000009a, 0x00001000},
1228         {0x0000009f, 0x00a07730}
1229 };
1230
1231 /* ucode loading */
1232 static int si_mc_load_microcode(struct radeon_device *rdev)
1233 {
1234         const __be32 *fw_data;
1235         u32 running, blackout = 0;
1236         u32 *io_mc_regs;
1237         int i, ucode_size, regs_size;
1238
1239         if (!rdev->mc_fw)
1240                 return -EINVAL;
1241
1242         switch (rdev->family) {
1243         case CHIP_TAHITI:
1244                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1245                 ucode_size = SI_MC_UCODE_SIZE;
1246                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1247                 break;
1248         case CHIP_PITCAIRN:
1249                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1250                 ucode_size = SI_MC_UCODE_SIZE;
1251                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1252                 break;
1253         case CHIP_VERDE:
1254         default:
1255                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1256                 ucode_size = SI_MC_UCODE_SIZE;
1257                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1258                 break;
1259         case CHIP_OLAND:
1260                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1261                 ucode_size = OLAND_MC_UCODE_SIZE;
1262                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1263                 break;
1264         case CHIP_HAINAN:
1265                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1266                 ucode_size = OLAND_MC_UCODE_SIZE;
1267                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1268                 break;
1269         }
1270
1271         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1272
1273         if (running == 0) {
1274                 if (running) {
1275                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1276                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1277                 }
1278
1279                 /* reset the engine and set to writable */
1280                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1281                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1282
1283                 /* load mc io regs */
1284                 for (i = 0; i < regs_size; i++) {
1285                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1286                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1287                 }
1288                 /* load the MC ucode */
1289                 fw_data = (const __be32 *)rdev->mc_fw->data;
1290                 for (i = 0; i < ucode_size; i++)
1291                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1292
1293                 /* put the engine back into the active state */
1294                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1295                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1296                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1297
1298                 /* wait for training to complete */
1299                 for (i = 0; i < rdev->usec_timeout; i++) {
1300                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1301                                 break;
1302                         udelay(1);
1303                 }
1304                 for (i = 0; i < rdev->usec_timeout; i++) {
1305                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1306                                 break;
1307                         udelay(1);
1308                 }
1309
1310                 if (running)
1311                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1312         }
1313
1314         return 0;
1315 }
1316
1317 static int si_init_microcode(struct radeon_device *rdev)
1318 {
1319         struct platform_device *pdev;
1320         const char *chip_name;
1321         const char *rlc_chip_name;
1322         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1323         char fw_name[30];
1324         int err;
1325
1326         DRM_DEBUG("\n");
1327
1328         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1329         err = IS_ERR(pdev);
1330         if (err) {
1331                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1332                 return -EINVAL;
1333         }
1334
1335         switch (rdev->family) {
1336         case CHIP_TAHITI:
1337                 chip_name = "TAHITI";
1338                 rlc_chip_name = "TAHITI";
1339                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1340                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1341                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1342                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1343                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1344                 break;
1345         case CHIP_PITCAIRN:
1346                 chip_name = "PITCAIRN";
1347                 rlc_chip_name = "PITCAIRN";
1348                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1349                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1350                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1351                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1352                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1353                 break;
1354         case CHIP_VERDE:
1355                 chip_name = "VERDE";
1356                 rlc_chip_name = "VERDE";
1357                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1358                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1359                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1360                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1361                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1362                 break;
1363         case CHIP_OLAND:
1364                 chip_name = "OLAND";
1365                 rlc_chip_name = "OLAND";
1366                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1367                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1368                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1369                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1370                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1371                 break;
1372         case CHIP_HAINAN:
1373                 chip_name = "HAINAN";
1374                 rlc_chip_name = "HAINAN";
1375                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1376                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1377                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1378                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1379                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1380                 break;
1381         default: BUG();
1382         }
1383
1384         DRM_INFO("Loading %s Microcode\n", chip_name);
1385
1386         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1387         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1388         if (err)
1389                 goto out;
1390         if (rdev->pfp_fw->size != pfp_req_size) {
1391                 printk(KERN_ERR
1392                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1393                        rdev->pfp_fw->size, fw_name);
1394                 err = -EINVAL;
1395                 goto out;
1396         }
1397
1398         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1399         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1400         if (err)
1401                 goto out;
1402         if (rdev->me_fw->size != me_req_size) {
1403                 printk(KERN_ERR
1404                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1405                        rdev->me_fw->size, fw_name);
1406                 err = -EINVAL;
1407         }
1408
1409         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1410         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1411         if (err)
1412                 goto out;
1413         if (rdev->ce_fw->size != ce_req_size) {
1414                 printk(KERN_ERR
1415                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1416                        rdev->ce_fw->size, fw_name);
1417                 err = -EINVAL;
1418         }
1419
1420         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1421         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1422         if (err)
1423                 goto out;
1424         if (rdev->rlc_fw->size != rlc_req_size) {
1425                 printk(KERN_ERR
1426                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1427                        rdev->rlc_fw->size, fw_name);
1428                 err = -EINVAL;
1429         }
1430
1431         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1432         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1433         if (err)
1434                 goto out;
1435         if (rdev->mc_fw->size != mc_req_size) {
1436                 printk(KERN_ERR
1437                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1438                        rdev->mc_fw->size, fw_name);
1439                 err = -EINVAL;
1440         }
1441
1442 out:
1443         platform_device_unregister(pdev);
1444
1445         if (err) {
1446                 if (err != -EINVAL)
1447                         printk(KERN_ERR
1448                                "si_cp: Failed to load firmware \"%s\"\n",
1449                                fw_name);
1450                 release_firmware(rdev->pfp_fw);
1451                 rdev->pfp_fw = NULL;
1452                 release_firmware(rdev->me_fw);
1453                 rdev->me_fw = NULL;
1454                 release_firmware(rdev->ce_fw);
1455                 rdev->ce_fw = NULL;
1456                 release_firmware(rdev->rlc_fw);
1457                 rdev->rlc_fw = NULL;
1458                 release_firmware(rdev->mc_fw);
1459                 rdev->mc_fw = NULL;
1460         }
1461         return err;
1462 }
1463
1464 /* watermark setup */
1465 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1466                                    struct radeon_crtc *radeon_crtc,
1467                                    struct drm_display_mode *mode,
1468                                    struct drm_display_mode *other_mode)
1469 {
1470         u32 tmp, buffer_alloc, i;
1471         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1472         /*
1473          * Line Buffer Setup
1474          * There are 3 line buffers, each one shared by 2 display controllers.
1475          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1476          * the display controllers.  The paritioning is done via one of four
1477          * preset allocations specified in bits 21:20:
1478          *  0 - half lb
1479          *  2 - whole lb, other crtc must be disabled
1480          */
1481         /* this can get tricky if we have two large displays on a paired group
1482          * of crtcs.  Ideally for multiple large displays we'd assign them to
1483          * non-linked crtcs for maximum line buffer allocation.
1484          */
1485         if (radeon_crtc->base.enabled && mode) {
1486                 if (other_mode) {
1487                         tmp = 0; /* 1/2 */
1488                         buffer_alloc = 1;
1489                 } else {
1490                         tmp = 2; /* whole */
1491                         buffer_alloc = 2;
1492                 }
1493         } else {
1494                 tmp = 0;
1495                 buffer_alloc = 0;
1496         }
1497
1498         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1499                DC_LB_MEMORY_CONFIG(tmp));
1500
1501         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1502                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1503         for (i = 0; i < rdev->usec_timeout; i++) {
1504                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1505                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1506                         break;
1507                 udelay(1);
1508         }
1509
1510         if (radeon_crtc->base.enabled && mode) {
1511                 switch (tmp) {
1512                 case 0:
1513                 default:
1514                         return 4096 * 2;
1515                 case 2:
1516                         return 8192 * 2;
1517                 }
1518         }
1519
1520         /* controller not enabled, so no lb used */
1521         return 0;
1522 }
1523
1524 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1525 {
1526         u32 tmp = RREG32(MC_SHARED_CHMAP);
1527
1528         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1529         case 0:
1530         default:
1531                 return 1;
1532         case 1:
1533                 return 2;
1534         case 2:
1535                 return 4;
1536         case 3:
1537                 return 8;
1538         case 4:
1539                 return 3;
1540         case 5:
1541                 return 6;
1542         case 6:
1543                 return 10;
1544         case 7:
1545                 return 12;
1546         case 8:
1547                 return 16;
1548         }
1549 }
1550
1551 struct dce6_wm_params {
1552         u32 dram_channels; /* number of dram channels */
1553         u32 yclk;          /* bandwidth per dram data pin in kHz */
1554         u32 sclk;          /* engine clock in kHz */
1555         u32 disp_clk;      /* display clock in kHz */
1556         u32 src_width;     /* viewport width */
1557         u32 active_time;   /* active display time in ns */
1558         u32 blank_time;    /* blank time in ns */
1559         bool interlaced;    /* mode is interlaced */
1560         fixed20_12 vsc;    /* vertical scale ratio */
1561         u32 num_heads;     /* number of active crtcs */
1562         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1563         u32 lb_size;       /* line buffer allocated to pipe */
1564         u32 vtaps;         /* vertical scaler taps */
1565 };
1566
1567 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1568 {
1569         /* Calculate raw DRAM Bandwidth */
1570         fixed20_12 dram_efficiency; /* 0.7 */
1571         fixed20_12 yclk, dram_channels, bandwidth;
1572         fixed20_12 a;
1573
1574         a.full = dfixed_const(1000);
1575         yclk.full = dfixed_const(wm->yclk);
1576         yclk.full = dfixed_div(yclk, a);
1577         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1578         a.full = dfixed_const(10);
1579         dram_efficiency.full = dfixed_const(7);
1580         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1581         bandwidth.full = dfixed_mul(dram_channels, yclk);
1582         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1583
1584         return dfixed_trunc(bandwidth);
1585 }
1586
1587 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1588 {
1589         /* Calculate DRAM Bandwidth and the part allocated to display. */
1590         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1591         fixed20_12 yclk, dram_channels, bandwidth;
1592         fixed20_12 a;
1593
1594         a.full = dfixed_const(1000);
1595         yclk.full = dfixed_const(wm->yclk);
1596         yclk.full = dfixed_div(yclk, a);
1597         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1598         a.full = dfixed_const(10);
1599         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1600         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1601         bandwidth.full = dfixed_mul(dram_channels, yclk);
1602         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1603
1604         return dfixed_trunc(bandwidth);
1605 }
1606
1607 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1608 {
1609         /* Calculate the display Data return Bandwidth */
1610         fixed20_12 return_efficiency; /* 0.8 */
1611         fixed20_12 sclk, bandwidth;
1612         fixed20_12 a;
1613
1614         a.full = dfixed_const(1000);
1615         sclk.full = dfixed_const(wm->sclk);
1616         sclk.full = dfixed_div(sclk, a);
1617         a.full = dfixed_const(10);
1618         return_efficiency.full = dfixed_const(8);
1619         return_efficiency.full = dfixed_div(return_efficiency, a);
1620         a.full = dfixed_const(32);
1621         bandwidth.full = dfixed_mul(a, sclk);
1622         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1623
1624         return dfixed_trunc(bandwidth);
1625 }
1626
1627 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1628 {
1629         return 32;
1630 }
1631
1632 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1633 {
1634         /* Calculate the DMIF Request Bandwidth */
1635         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1636         fixed20_12 disp_clk, sclk, bandwidth;
1637         fixed20_12 a, b1, b2;
1638         u32 min_bandwidth;
1639
1640         a.full = dfixed_const(1000);
1641         disp_clk.full = dfixed_const(wm->disp_clk);
1642         disp_clk.full = dfixed_div(disp_clk, a);
1643         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1644         b1.full = dfixed_mul(a, disp_clk);
1645
1646         a.full = dfixed_const(1000);
1647         sclk.full = dfixed_const(wm->sclk);
1648         sclk.full = dfixed_div(sclk, a);
1649         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1650         b2.full = dfixed_mul(a, sclk);
1651
1652         a.full = dfixed_const(10);
1653         disp_clk_request_efficiency.full = dfixed_const(8);
1654         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1655
1656         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1657
1658         a.full = dfixed_const(min_bandwidth);
1659         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1660
1661         return dfixed_trunc(bandwidth);
1662 }
1663
1664 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1665 {
1666         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1667         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1668         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1669         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1670
1671         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1672 }
1673
1674 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1675 {
1676         /* Calculate the display mode Average Bandwidth
1677          * DisplayMode should contain the source and destination dimensions,
1678          * timing, etc.
1679          */
1680         fixed20_12 bpp;
1681         fixed20_12 line_time;
1682         fixed20_12 src_width;
1683         fixed20_12 bandwidth;
1684         fixed20_12 a;
1685
1686         a.full = dfixed_const(1000);
1687         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1688         line_time.full = dfixed_div(line_time, a);
1689         bpp.full = dfixed_const(wm->bytes_per_pixel);
1690         src_width.full = dfixed_const(wm->src_width);
1691         bandwidth.full = dfixed_mul(src_width, bpp);
1692         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1693         bandwidth.full = dfixed_div(bandwidth, line_time);
1694
1695         return dfixed_trunc(bandwidth);
1696 }
1697
1698 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1699 {
1700         /* First calcualte the latency in ns */
1701         u32 mc_latency = 2000; /* 2000 ns. */
1702         u32 available_bandwidth = dce6_available_bandwidth(wm);
1703         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1704         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1705         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1706         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1707                 (wm->num_heads * cursor_line_pair_return_time);
1708         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1709         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1710         u32 tmp, dmif_size = 12288;
1711         fixed20_12 a, b, c;
1712
1713         if (wm->num_heads == 0)
1714                 return 0;
1715
1716         a.full = dfixed_const(2);
1717         b.full = dfixed_const(1);
1718         if ((wm->vsc.full > a.full) ||
1719             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1720             (wm->vtaps >= 5) ||
1721             ((wm->vsc.full >= a.full) && wm->interlaced))
1722                 max_src_lines_per_dst_line = 4;
1723         else
1724                 max_src_lines_per_dst_line = 2;
1725
1726         a.full = dfixed_const(available_bandwidth);
1727         b.full = dfixed_const(wm->num_heads);
1728         a.full = dfixed_div(a, b);
1729
1730         b.full = dfixed_const(mc_latency + 512);
1731         c.full = dfixed_const(wm->disp_clk);
1732         b.full = dfixed_div(b, c);
1733
1734         c.full = dfixed_const(dmif_size);
1735         b.full = dfixed_div(c, b);
1736
1737         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1738
1739         b.full = dfixed_const(1000);
1740         c.full = dfixed_const(wm->disp_clk);
1741         b.full = dfixed_div(c, b);
1742         c.full = dfixed_const(wm->bytes_per_pixel);
1743         b.full = dfixed_mul(b, c);
1744
1745         lb_fill_bw = min(tmp, dfixed_trunc(b));
1746
1747         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1748         b.full = dfixed_const(1000);
1749         c.full = dfixed_const(lb_fill_bw);
1750         b.full = dfixed_div(c, b);
1751         a.full = dfixed_div(a, b);
1752         line_fill_time = dfixed_trunc(a);
1753
1754         if (line_fill_time < wm->active_time)
1755                 return latency;
1756         else
1757                 return latency + (line_fill_time - wm->active_time);
1758
1759 }
1760
1761 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1762 {
1763         if (dce6_average_bandwidth(wm) <=
1764             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1765                 return true;
1766         else
1767                 return false;
1768 };
1769
1770 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1771 {
1772         if (dce6_average_bandwidth(wm) <=
1773             (dce6_available_bandwidth(wm) / wm->num_heads))
1774                 return true;
1775         else
1776                 return false;
1777 };
1778
1779 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1780 {
1781         u32 lb_partitions = wm->lb_size / wm->src_width;
1782         u32 line_time = wm->active_time + wm->blank_time;
1783         u32 latency_tolerant_lines;
1784         u32 latency_hiding;
1785         fixed20_12 a;
1786
1787         a.full = dfixed_const(1);
1788         if (wm->vsc.full > a.full)
1789                 latency_tolerant_lines = 1;
1790         else {
1791                 if (lb_partitions <= (wm->vtaps + 1))
1792                         latency_tolerant_lines = 1;
1793                 else
1794                         latency_tolerant_lines = 2;
1795         }
1796
1797         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1798
1799         if (dce6_latency_watermark(wm) <= latency_hiding)
1800                 return true;
1801         else
1802                 return false;
1803 }
1804
1805 static void dce6_program_watermarks(struct radeon_device *rdev,
1806                                          struct radeon_crtc *radeon_crtc,
1807                                          u32 lb_size, u32 num_heads)
1808 {
1809         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1810         struct dce6_wm_params wm;
1811         u32 pixel_period;
1812         u32 line_time = 0;
1813         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1814         u32 priority_a_mark = 0, priority_b_mark = 0;
1815         u32 priority_a_cnt = PRIORITY_OFF;
1816         u32 priority_b_cnt = PRIORITY_OFF;
1817         u32 tmp, arb_control3;
1818         fixed20_12 a, b, c;
1819
1820         if (radeon_crtc->base.enabled && num_heads && mode) {
1821                 pixel_period = 1000000 / (u32)mode->clock;
1822                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1823                 priority_a_cnt = 0;
1824                 priority_b_cnt = 0;
1825
1826                 wm.yclk = rdev->pm.current_mclk * 10;
1827                 wm.sclk = rdev->pm.current_sclk * 10;
1828                 wm.disp_clk = mode->clock;
1829                 wm.src_width = mode->crtc_hdisplay;
1830                 wm.active_time = mode->crtc_hdisplay * pixel_period;
1831                 wm.blank_time = line_time - wm.active_time;
1832                 wm.interlaced = false;
1833                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1834                         wm.interlaced = true;
1835                 wm.vsc = radeon_crtc->vsc;
1836                 wm.vtaps = 1;
1837                 if (radeon_crtc->rmx_type != RMX_OFF)
1838                         wm.vtaps = 2;
1839                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1840                 wm.lb_size = lb_size;
1841                 if (rdev->family == CHIP_ARUBA)
1842                         wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1843                 else
1844                         wm.dram_channels = si_get_number_of_dram_channels(rdev);
1845                 wm.num_heads = num_heads;
1846
1847                 /* set for high clocks */
1848                 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1849                 /* set for low clocks */
1850                 /* wm.yclk = low clk; wm.sclk = low clk */
1851                 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1852
1853                 /* possibly force display priority to high */
1854                 /* should really do this at mode validation time... */
1855                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1856                     !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1857                     !dce6_check_latency_hiding(&wm) ||
1858                     (rdev->disp_priority == 2)) {
1859                         DRM_DEBUG_KMS("force priority to high\n");
1860                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1861                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1862                 }
1863
1864                 a.full = dfixed_const(1000);
1865                 b.full = dfixed_const(mode->clock);
1866                 b.full = dfixed_div(b, a);
1867                 c.full = dfixed_const(latency_watermark_a);
1868                 c.full = dfixed_mul(c, b);
1869                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1870                 c.full = dfixed_div(c, a);
1871                 a.full = dfixed_const(16);
1872                 c.full = dfixed_div(c, a);
1873                 priority_a_mark = dfixed_trunc(c);
1874                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1875
1876                 a.full = dfixed_const(1000);
1877                 b.full = dfixed_const(mode->clock);
1878                 b.full = dfixed_div(b, a);
1879                 c.full = dfixed_const(latency_watermark_b);
1880                 c.full = dfixed_mul(c, b);
1881                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1882                 c.full = dfixed_div(c, a);
1883                 a.full = dfixed_const(16);
1884                 c.full = dfixed_div(c, a);
1885                 priority_b_mark = dfixed_trunc(c);
1886                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1887         }
1888
1889         /* select wm A */
1890         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1891         tmp = arb_control3;
1892         tmp &= ~LATENCY_WATERMARK_MASK(3);
1893         tmp |= LATENCY_WATERMARK_MASK(1);
1894         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1895         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1896                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1897                 LATENCY_HIGH_WATERMARK(line_time)));
1898         /* select wm B */
1899         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1900         tmp &= ~LATENCY_WATERMARK_MASK(3);
1901         tmp |= LATENCY_WATERMARK_MASK(2);
1902         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1903         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1904                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1905                 LATENCY_HIGH_WATERMARK(line_time)));
1906         /* restore original selection */
1907         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1908
1909         /* write the priority marks */
1910         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1911         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1912
1913 }
1914
1915 void dce6_bandwidth_update(struct radeon_device *rdev)
1916 {
1917         struct drm_display_mode *mode0 = NULL;
1918         struct drm_display_mode *mode1 = NULL;
1919         u32 num_heads = 0, lb_size;
1920         int i;
1921
1922         radeon_update_display_priority(rdev);
1923
1924         for (i = 0; i < rdev->num_crtc; i++) {
1925                 if (rdev->mode_info.crtcs[i]->base.enabled)
1926                         num_heads++;
1927         }
1928         for (i = 0; i < rdev->num_crtc; i += 2) {
1929                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1930                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1931                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1932                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1933                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1934                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1935         }
1936 }
1937
1938 /*
1939  * Core functions
1940  */
1941 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1942 {
1943         const u32 num_tile_mode_states = 32;
1944         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1945
1946         switch (rdev->config.si.mem_row_size_in_kb) {
1947         case 1:
1948                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1949                 break;
1950         case 2:
1951         default:
1952                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1953                 break;
1954         case 4:
1955                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1956                 break;
1957         }
1958
1959         if ((rdev->family == CHIP_TAHITI) ||
1960             (rdev->family == CHIP_PITCAIRN)) {
1961                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1962                         switch (reg_offset) {
1963                         case 0:  /* non-AA compressed depth or any compressed stencil */
1964                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1965                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1966                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1967                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1968                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1969                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1971                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1972                                 break;
1973                         case 1:  /* 2xAA/4xAA compressed depth only */
1974                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1975                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1976                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1977                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1978                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1979                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1980                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1981                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1982                                 break;
1983                         case 2:  /* 8xAA compressed depth only */
1984                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1985                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1986                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1987                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1988                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1989                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1991                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1992                                 break;
1993                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1994                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1996                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1997                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1998                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1999                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2000                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2001                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2002                                 break;
2003                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2004                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2005                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2006                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2007                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2008                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2009                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2012                                 break;
2013                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2014                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2015                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2016                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2017                                                  TILE_SPLIT(split_equal_to_row_size) |
2018                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2019                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2020                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2021                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2022                                 break;
2023                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size) |
2028                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2029                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2030                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2031                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2032                                 break;
2033                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2037                                                  TILE_SPLIT(split_equal_to_row_size) |
2038                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2039                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2040                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2041                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2042                                 break;
2043                         case 8:  /* 1D and 1D Array Surfaces */
2044                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2045                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2046                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2047                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2048                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2049                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2050                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2051                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2052                                 break;
2053                         case 9:  /* Displayable maps. */
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2057                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2058                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2059                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2060                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2061                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2062                                 break;
2063                         case 10:  /* Display 8bpp. */
2064                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2067                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2068                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2069                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2070                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2071                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2072                                 break;
2073                         case 11:  /* Display 16bpp. */
2074                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2075                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2077                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2078                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2079                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2080                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2081                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2082                                 break;
2083                         case 12:  /* Display 32bpp. */
2084                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2085                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2086                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2087                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2088                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2089                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2090                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2091                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2092                                 break;
2093                         case 13:  /* Thin. */
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2096                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2097                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2098                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2099                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2102                                 break;
2103                         case 14:  /* Thin 8 bpp. */
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2107                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2108                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2109                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2110                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2111                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2112                                 break;
2113                         case 15:  /* Thin 16 bpp. */
2114                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2116                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2117                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2118                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2119                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2120                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2121                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2122                                 break;
2123                         case 16:  /* Thin 32 bpp. */
2124                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2126                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2127                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2129                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2132                                 break;
2133                         case 17:  /* Thin 64 bpp. */
2134                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2136                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2137                                                  TILE_SPLIT(split_equal_to_row_size) |
2138                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2139                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2140                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2141                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2142                                 break;
2143                         case 21:  /* 8 bpp PRT. */
2144                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2146                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2147                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2148                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2149                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2152                                 break;
2153                         case 22:  /* 16 bpp PRT */
2154                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2156                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2157                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2158                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2159                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2162                                 break;
2163                         case 23:  /* 32 bpp PRT */
2164                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2166                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2167                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2168                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2169                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2171                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2172                                 break;
2173                         case 24:  /* 64 bpp PRT */
2174                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2176                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2177                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2178                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2179                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2182                                 break;
2183                         case 25:  /* 128 bpp PRT */
2184                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2186                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2187                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2189                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2191                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2192                                 break;
2193                         default:
2194                                 gb_tile_moden = 0;
2195                                 break;
2196                         }
2197                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2198                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2199                 }
2200         } else if ((rdev->family == CHIP_VERDE) ||
2201                    (rdev->family == CHIP_OLAND) ||
2202                    (rdev->family == CHIP_HAINAN)) {
2203                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2204                         switch (reg_offset) {
2205                         case 0:  /* non-AA compressed depth or any compressed stencil */
2206                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2208                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2209                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2210                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2211                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2212                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2213                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2214                                 break;
2215                         case 1:  /* 2xAA/4xAA compressed depth only */
2216                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2218                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2219                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2220                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2221                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2222                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2224                                 break;
2225                         case 2:  /* 8xAA compressed depth only */
2226                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2227                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2228                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2229                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2230                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2231                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2234                                 break;
2235                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2240                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2241                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2244                                 break;
2245                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2246                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2247                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2248                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2249                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2250                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2251                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2253                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2254                                 break;
2255                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2256                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2258                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2259                                                  TILE_SPLIT(split_equal_to_row_size) |
2260                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2261                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2264                                 break;
2265                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2266                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2268                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2269                                                  TILE_SPLIT(split_equal_to_row_size) |
2270                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2271                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2274                                 break;
2275                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2276                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2278                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2279                                                  TILE_SPLIT(split_equal_to_row_size) |
2280                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2281                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2283                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2284                                 break;
2285                         case 8:  /* 1D and 1D Array Surfaces */
2286                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2287                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2288                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2289                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2290                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2291                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2294                                 break;
2295                         case 9:  /* Displayable maps. */
2296                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2299                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2300                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2301                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2303                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2304                                 break;
2305                         case 10:  /* Display 8bpp. */
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2309                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2310                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2311                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2313                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2314                                 break;
2315                         case 11:  /* Display 16bpp. */
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2319                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2320                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2321                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2323                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2324                                 break;
2325                         case 12:  /* Display 32bpp. */
2326                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2328                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2329                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2330                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2331                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2333                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2334                                 break;
2335                         case 13:  /* Thin. */
2336                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2337                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2338                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2339                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2340                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2341                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2342                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2343                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2344                                 break;
2345                         case 14:  /* Thin 8 bpp. */
2346                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2348                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2349                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2350                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2351                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2353                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2354                                 break;
2355                         case 15:  /* Thin 16 bpp. */
2356                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2358                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2359                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2360                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2361                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2364                                 break;
2365                         case 16:  /* Thin 32 bpp. */
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2368                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2369                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2370                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2371                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2374                                 break;
2375                         case 17:  /* Thin 64 bpp. */
2376                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2377                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2378                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2379                                                  TILE_SPLIT(split_equal_to_row_size) |
2380                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2381                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2383                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2384                                 break;
2385                         case 21:  /* 8 bpp PRT. */
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2389                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2390                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2391                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2394                                 break;
2395                         case 22:  /* 16 bpp PRT */
2396                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2398                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2399                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2401                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2403                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2404                                 break;
2405                         case 23:  /* 32 bpp PRT */
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2409                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2411                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2413                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2414                                 break;
2415                         case 24:  /* 64 bpp PRT */
2416                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2418                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2419                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2420                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2421                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2424                                 break;
2425                         case 25:  /* 128 bpp PRT */
2426                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2428                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2429                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2430                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2431                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2434                                 break;
2435                         default:
2436                                 gb_tile_moden = 0;
2437                                 break;
2438                         }
2439                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2440                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441                 }
2442         } else
2443                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2444 }
2445
2446 static void si_select_se_sh(struct radeon_device *rdev,
2447                             u32 se_num, u32 sh_num)
2448 {
2449         u32 data = INSTANCE_BROADCAST_WRITES;
2450
2451         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2452                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2453         else if (se_num == 0xffffffff)
2454                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2455         else if (sh_num == 0xffffffff)
2456                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2457         else
2458                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2459         WREG32(GRBM_GFX_INDEX, data);
2460 }
2461
2462 static u32 si_create_bitmask(u32 bit_width)
2463 {
2464         u32 i, mask = 0;
2465
2466         for (i = 0; i < bit_width; i++) {
2467                 mask <<= 1;
2468                 mask |= 1;
2469         }
2470         return mask;
2471 }
2472
2473 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2474 {
2475         u32 data, mask;
2476
2477         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2478         if (data & 1)
2479                 data &= INACTIVE_CUS_MASK;
2480         else
2481                 data = 0;
2482         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2483
2484         data >>= INACTIVE_CUS_SHIFT;
2485
2486         mask = si_create_bitmask(cu_per_sh);
2487
2488         return ~data & mask;
2489 }
2490
2491 static void si_setup_spi(struct radeon_device *rdev,
2492                          u32 se_num, u32 sh_per_se,
2493                          u32 cu_per_sh)
2494 {
2495         int i, j, k;
2496         u32 data, mask, active_cu;
2497
2498         for (i = 0; i < se_num; i++) {
2499                 for (j = 0; j < sh_per_se; j++) {
2500                         si_select_se_sh(rdev, i, j);
2501                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2502                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2503
2504                         mask = 1;
2505                         for (k = 0; k < 16; k++) {
2506                                 mask <<= k;
2507                                 if (active_cu & mask) {
2508                                         data &= ~mask;
2509                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2510                                         break;
2511                                 }
2512                         }
2513                 }
2514         }
2515         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2516 }
2517
2518 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2519                               u32 max_rb_num, u32 se_num,
2520                               u32 sh_per_se)
2521 {
2522         u32 data, mask;
2523
2524         data = RREG32(CC_RB_BACKEND_DISABLE);
2525         if (data & 1)
2526                 data &= BACKEND_DISABLE_MASK;
2527         else
2528                 data = 0;
2529         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2530
2531         data >>= BACKEND_DISABLE_SHIFT;
2532
2533         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2534
2535         return data & mask;
2536 }
2537
2538 static void si_setup_rb(struct radeon_device *rdev,
2539                         u32 se_num, u32 sh_per_se,
2540                         u32 max_rb_num)
2541 {
2542         int i, j;
2543         u32 data, mask;
2544         u32 disabled_rbs = 0;
2545         u32 enabled_rbs = 0;
2546
2547         for (i = 0; i < se_num; i++) {
2548                 for (j = 0; j < sh_per_se; j++) {
2549                         si_select_se_sh(rdev, i, j);
2550                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2551                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2552                 }
2553         }
2554         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2555
2556         mask = 1;
2557         for (i = 0; i < max_rb_num; i++) {
2558                 if (!(disabled_rbs & mask))
2559                         enabled_rbs |= mask;
2560                 mask <<= 1;
2561         }
2562
2563         for (i = 0; i < se_num; i++) {
2564                 si_select_se_sh(rdev, i, 0xffffffff);
2565                 data = 0;
2566                 for (j = 0; j < sh_per_se; j++) {
2567                         switch (enabled_rbs & 3) {
2568                         case 1:
2569                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2570                                 break;
2571                         case 2:
2572                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2573                                 break;
2574                         case 3:
2575                         default:
2576                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2577                                 break;
2578                         }
2579                         enabled_rbs >>= 2;
2580                 }
2581                 WREG32(PA_SC_RASTER_CONFIG, data);
2582         }
2583         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2584 }
2585
2586 static void si_gpu_init(struct radeon_device *rdev)
2587 {
2588         u32 gb_addr_config = 0;
2589         u32 mc_shared_chmap, mc_arb_ramcfg;
2590         u32 sx_debug_1;
2591         u32 hdp_host_path_cntl;
2592         u32 tmp;
2593         int i, j;
2594
2595         switch (rdev->family) {
2596         case CHIP_TAHITI:
2597                 rdev->config.si.max_shader_engines = 2;
2598                 rdev->config.si.max_tile_pipes = 12;
2599                 rdev->config.si.max_cu_per_sh = 8;
2600                 rdev->config.si.max_sh_per_se = 2;
2601                 rdev->config.si.max_backends_per_se = 4;
2602                 rdev->config.si.max_texture_channel_caches = 12;
2603                 rdev->config.si.max_gprs = 256;
2604                 rdev->config.si.max_gs_threads = 32;
2605                 rdev->config.si.max_hw_contexts = 8;
2606
2607                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2608                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2609                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2610                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2611                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2612                 break;
2613         case CHIP_PITCAIRN:
2614                 rdev->config.si.max_shader_engines = 2;
2615                 rdev->config.si.max_tile_pipes = 8;
2616                 rdev->config.si.max_cu_per_sh = 5;
2617                 rdev->config.si.max_sh_per_se = 2;
2618                 rdev->config.si.max_backends_per_se = 4;
2619                 rdev->config.si.max_texture_channel_caches = 8;
2620                 rdev->config.si.max_gprs = 256;
2621                 rdev->config.si.max_gs_threads = 32;
2622                 rdev->config.si.max_hw_contexts = 8;
2623
2624                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2625                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2626                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2627                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2628                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2629                 break;
2630         case CHIP_VERDE:
2631         default:
2632                 rdev->config.si.max_shader_engines = 1;
2633                 rdev->config.si.max_tile_pipes = 4;
2634                 rdev->config.si.max_cu_per_sh = 5;
2635                 rdev->config.si.max_sh_per_se = 2;
2636                 rdev->config.si.max_backends_per_se = 4;
2637                 rdev->config.si.max_texture_channel_caches = 4;
2638                 rdev->config.si.max_gprs = 256;
2639                 rdev->config.si.max_gs_threads = 32;
2640                 rdev->config.si.max_hw_contexts = 8;
2641
2642                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2643                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2644                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2645                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2646                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2647                 break;
2648         case CHIP_OLAND:
2649                 rdev->config.si.max_shader_engines = 1;
2650                 rdev->config.si.max_tile_pipes = 4;
2651                 rdev->config.si.max_cu_per_sh = 6;
2652                 rdev->config.si.max_sh_per_se = 1;
2653                 rdev->config.si.max_backends_per_se = 2;
2654                 rdev->config.si.max_texture_channel_caches = 4;
2655                 rdev->config.si.max_gprs = 256;
2656                 rdev->config.si.max_gs_threads = 16;
2657                 rdev->config.si.max_hw_contexts = 8;
2658
2659                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2660                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2661                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2662                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2663                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2664                 break;
2665         case CHIP_HAINAN:
2666                 rdev->config.si.max_shader_engines = 1;
2667                 rdev->config.si.max_tile_pipes = 4;
2668                 rdev->config.si.max_cu_per_sh = 5;
2669                 rdev->config.si.max_sh_per_se = 1;
2670                 rdev->config.si.max_backends_per_se = 1;
2671                 rdev->config.si.max_texture_channel_caches = 2;
2672                 rdev->config.si.max_gprs = 256;
2673                 rdev->config.si.max_gs_threads = 16;
2674                 rdev->config.si.max_hw_contexts = 8;
2675
2676                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2677                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2678                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2679                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2680                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2681                 break;
2682         }
2683
2684         /* Initialize HDP */
2685         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2686                 WREG32((0x2c14 + j), 0x00000000);
2687                 WREG32((0x2c18 + j), 0x00000000);
2688                 WREG32((0x2c1c + j), 0x00000000);
2689                 WREG32((0x2c20 + j), 0x00000000);
2690                 WREG32((0x2c24 + j), 0x00000000);
2691         }
2692
2693         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2694
2695         evergreen_fix_pci_max_read_req_size(rdev);
2696
2697         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2698
2699         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2700         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2701
2702         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2703         rdev->config.si.mem_max_burst_length_bytes = 256;
2704         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2705         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2706         if (rdev->config.si.mem_row_size_in_kb > 4)
2707                 rdev->config.si.mem_row_size_in_kb = 4;
2708         /* XXX use MC settings? */
2709         rdev->config.si.shader_engine_tile_size = 32;
2710         rdev->config.si.num_gpus = 1;
2711         rdev->config.si.multi_gpu_tile_size = 64;
2712
2713         /* fix up row size */
2714         gb_addr_config &= ~ROW_SIZE_MASK;
2715         switch (rdev->config.si.mem_row_size_in_kb) {
2716         case 1:
2717         default:
2718                 gb_addr_config |= ROW_SIZE(0);
2719                 break;
2720         case 2:
2721                 gb_addr_config |= ROW_SIZE(1);
2722                 break;
2723         case 4:
2724                 gb_addr_config |= ROW_SIZE(2);
2725                 break;
2726         }
2727
2728         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2729          * not have bank info, so create a custom tiling dword.
2730          * bits 3:0   num_pipes
2731          * bits 7:4   num_banks
2732          * bits 11:8  group_size
2733          * bits 15:12 row_size
2734          */
2735         rdev->config.si.tile_config = 0;
2736         switch (rdev->config.si.num_tile_pipes) {
2737         case 1:
2738                 rdev->config.si.tile_config |= (0 << 0);
2739                 break;
2740         case 2:
2741                 rdev->config.si.tile_config |= (1 << 0);
2742                 break;
2743         case 4:
2744                 rdev->config.si.tile_config |= (2 << 0);
2745                 break;
2746         case 8:
2747         default:
2748                 /* XXX what about 12? */
2749                 rdev->config.si.tile_config |= (3 << 0);
2750                 break;
2751         }       
2752         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2753         case 0: /* four banks */
2754                 rdev->config.si.tile_config |= 0 << 4;
2755                 break;
2756         case 1: /* eight banks */
2757                 rdev->config.si.tile_config |= 1 << 4;
2758                 break;
2759         case 2: /* sixteen banks */
2760         default:
2761                 rdev->config.si.tile_config |= 2 << 4;
2762                 break;
2763         }
2764         rdev->config.si.tile_config |=
2765                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2766         rdev->config.si.tile_config |=
2767                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2768
2769         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2770         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2771         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2772         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2773         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2774         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2775         if (rdev->has_uvd) {
2776                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2777                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2778                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2779         }
2780
2781         si_tiling_mode_table_init(rdev);
2782
2783         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2784                     rdev->config.si.max_sh_per_se,
2785                     rdev->config.si.max_backends_per_se);
2786
2787         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2788                      rdev->config.si.max_sh_per_se,
2789                      rdev->config.si.max_cu_per_sh);
2790
2791
2792         /* set HW defaults for 3D engine */
2793         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2794                                      ROQ_IB2_START(0x2b)));
2795         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2796
2797         sx_debug_1 = RREG32(SX_DEBUG_1);
2798         WREG32(SX_DEBUG_1, sx_debug_1);
2799
2800         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2801
2802         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2803                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2804                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2805                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2806
2807         WREG32(VGT_NUM_INSTANCES, 1);
2808
2809         WREG32(CP_PERFMON_CNTL, 0);
2810
2811         WREG32(SQ_CONFIG, 0);
2812
2813         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2814                                           FORCE_EOV_MAX_REZ_CNT(255)));
2815
2816         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2817                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2818
2819         WREG32(VGT_GS_VERTEX_REUSE, 16);
2820         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2821
2822         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2823         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2824         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2825         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2826         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2827         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2828         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2829         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2830
2831         tmp = RREG32(HDP_MISC_CNTL);
2832         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2833         WREG32(HDP_MISC_CNTL, tmp);
2834
2835         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2836         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2837
2838         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2839
2840         udelay(50);
2841 }
2842
2843 /*
2844  * GPU scratch registers helpers function.
2845  */
2846 static void si_scratch_init(struct radeon_device *rdev)
2847 {
2848         int i;
2849
2850         rdev->scratch.num_reg = 7;
2851         rdev->scratch.reg_base = SCRATCH_REG0;
2852         for (i = 0; i < rdev->scratch.num_reg; i++) {
2853                 rdev->scratch.free[i] = true;
2854                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2855         }
2856 }
2857
2858 void si_fence_ring_emit(struct radeon_device *rdev,
2859                         struct radeon_fence *fence)
2860 {
2861         struct radeon_ring *ring = &rdev->ring[fence->ring];
2862         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2863
2864         /* flush read cache over gart */
2865         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2866         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2867         radeon_ring_write(ring, 0);
2868         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2869         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2870                           PACKET3_TC_ACTION_ENA |
2871                           PACKET3_SH_KCACHE_ACTION_ENA |
2872                           PACKET3_SH_ICACHE_ACTION_ENA);
2873         radeon_ring_write(ring, 0xFFFFFFFF);
2874         radeon_ring_write(ring, 0);
2875         radeon_ring_write(ring, 10); /* poll interval */
2876         /* EVENT_WRITE_EOP - flush caches, send int */
2877         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2878         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2879         radeon_ring_write(ring, addr & 0xffffffff);
2880         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2881         radeon_ring_write(ring, fence->seq);
2882         radeon_ring_write(ring, 0);
2883 }
2884
2885 /*
2886  * IB stuff
2887  */
2888 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2889 {
2890         struct radeon_ring *ring = &rdev->ring[ib->ring];
2891         u32 header;
2892
2893         if (ib->is_const_ib) {
2894                 /* set switch buffer packet before const IB */
2895                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2896                 radeon_ring_write(ring, 0);
2897
2898                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2899         } else {
2900                 u32 next_rptr;
2901                 if (ring->rptr_save_reg) {
2902                         next_rptr = ring->wptr + 3 + 4 + 8;
2903                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2904                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2905                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2906                         radeon_ring_write(ring, next_rptr);
2907                 } else if (rdev->wb.enabled) {
2908                         next_rptr = ring->wptr + 5 + 4 + 8;
2909                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2910                         radeon_ring_write(ring, (1 << 8));
2911                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2912                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2913                         radeon_ring_write(ring, next_rptr);
2914                 }
2915
2916                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2917         }
2918
2919         radeon_ring_write(ring, header);
2920         radeon_ring_write(ring,
2921 #ifdef __BIG_ENDIAN
2922                           (2 << 0) |
2923 #endif
2924                           (ib->gpu_addr & 0xFFFFFFFC));
2925         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2926         radeon_ring_write(ring, ib->length_dw |
2927                           (ib->vm ? (ib->vm->id << 24) : 0));
2928
2929         if (!ib->is_const_ib) {
2930                 /* flush read cache over gart for this vmid */
2931                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2932                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2933                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2934                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2935                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2936                                   PACKET3_TC_ACTION_ENA |
2937                                   PACKET3_SH_KCACHE_ACTION_ENA |
2938                                   PACKET3_SH_ICACHE_ACTION_ENA);
2939                 radeon_ring_write(ring, 0xFFFFFFFF);
2940                 radeon_ring_write(ring, 0);
2941                 radeon_ring_write(ring, 10); /* poll interval */
2942         }
2943 }
2944
2945 /*
2946  * CP.
2947  */
2948 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2949 {
2950         if (enable)
2951                 WREG32(CP_ME_CNTL, 0);
2952         else {
2953                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2954                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2955                 WREG32(SCRATCH_UMSK, 0);
2956                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2957                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2958                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2959         }
2960         udelay(50);
2961 }
2962
2963 static int si_cp_load_microcode(struct radeon_device *rdev)
2964 {
2965         const __be32 *fw_data;
2966         int i;
2967
2968         if (!rdev->me_fw || !rdev->pfp_fw)
2969                 return -EINVAL;
2970
2971         si_cp_enable(rdev, false);
2972
2973         /* PFP */
2974         fw_data = (const __be32 *)rdev->pfp_fw->data;
2975         WREG32(CP_PFP_UCODE_ADDR, 0);
2976         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2977                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2978         WREG32(CP_PFP_UCODE_ADDR, 0);
2979
2980         /* CE */
2981         fw_data = (const __be32 *)rdev->ce_fw->data;
2982         WREG32(CP_CE_UCODE_ADDR, 0);
2983         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2984                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2985         WREG32(CP_CE_UCODE_ADDR, 0);
2986
2987         /* ME */
2988         fw_data = (const __be32 *)rdev->me_fw->data;
2989         WREG32(CP_ME_RAM_WADDR, 0);
2990         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2991                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2992         WREG32(CP_ME_RAM_WADDR, 0);
2993
2994         WREG32(CP_PFP_UCODE_ADDR, 0);
2995         WREG32(CP_CE_UCODE_ADDR, 0);
2996         WREG32(CP_ME_RAM_WADDR, 0);
2997         WREG32(CP_ME_RAM_RADDR, 0);
2998         return 0;
2999 }
3000
3001 static int si_cp_start(struct radeon_device *rdev)
3002 {
3003         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3004         int r, i;
3005
3006         r = radeon_ring_lock(rdev, ring, 7 + 4);
3007         if (r) {
3008                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3009                 return r;
3010         }
3011         /* init the CP */
3012         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3013         radeon_ring_write(ring, 0x1);
3014         radeon_ring_write(ring, 0x0);
3015         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3016         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3017         radeon_ring_write(ring, 0);
3018         radeon_ring_write(ring, 0);
3019
3020         /* init the CE partitions */
3021         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3022         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3023         radeon_ring_write(ring, 0xc000);
3024         radeon_ring_write(ring, 0xe000);
3025         radeon_ring_unlock_commit(rdev, ring);
3026
3027         si_cp_enable(rdev, true);
3028
3029         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3030         if (r) {
3031                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3032                 return r;
3033         }
3034
3035         /* setup clear context state */
3036         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3037         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3038
3039         for (i = 0; i < si_default_size; i++)
3040                 radeon_ring_write(ring, si_default_state[i]);
3041
3042         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3043         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3044
3045         /* set clear context state */
3046         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3047         radeon_ring_write(ring, 0);
3048
3049         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3050         radeon_ring_write(ring, 0x00000316);
3051         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3052         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3053
3054         radeon_ring_unlock_commit(rdev, ring);
3055
3056         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3057                 ring = &rdev->ring[i];
3058                 r = radeon_ring_lock(rdev, ring, 2);
3059
3060                 /* clear the compute context state */
3061                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3062                 radeon_ring_write(ring, 0);
3063
3064                 radeon_ring_unlock_commit(rdev, ring);
3065         }
3066
3067         return 0;
3068 }
3069
3070 static void si_cp_fini(struct radeon_device *rdev)
3071 {
3072         struct radeon_ring *ring;
3073         si_cp_enable(rdev, false);
3074
3075         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3076         radeon_ring_fini(rdev, ring);
3077         radeon_scratch_free(rdev, ring->rptr_save_reg);
3078
3079         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3080         radeon_ring_fini(rdev, ring);
3081         radeon_scratch_free(rdev, ring->rptr_save_reg);
3082
3083         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3084         radeon_ring_fini(rdev, ring);
3085         radeon_scratch_free(rdev, ring->rptr_save_reg);
3086 }
3087
3088 static int si_cp_resume(struct radeon_device *rdev)
3089 {
3090         struct radeon_ring *ring;
3091         u32 tmp;
3092         u32 rb_bufsz;
3093         int r;
3094
3095         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3096         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3097                                  SOFT_RESET_PA |
3098                                  SOFT_RESET_VGT |
3099                                  SOFT_RESET_SPI |
3100                                  SOFT_RESET_SX));
3101         RREG32(GRBM_SOFT_RESET);
3102         mdelay(15);
3103         WREG32(GRBM_SOFT_RESET, 0);
3104         RREG32(GRBM_SOFT_RESET);
3105
3106         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3107         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3108
3109         /* Set the write pointer delay */
3110         WREG32(CP_RB_WPTR_DELAY, 0);
3111
3112         WREG32(CP_DEBUG, 0);
3113         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3114
3115         /* ring 0 - compute and gfx */
3116         /* Set ring buffer size */
3117         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3118         rb_bufsz = drm_order(ring->ring_size / 8);
3119         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3120 #ifdef __BIG_ENDIAN
3121         tmp |= BUF_SWAP_32BIT;
3122 #endif
3123         WREG32(CP_RB0_CNTL, tmp);
3124
3125         /* Initialize the ring buffer's read and write pointers */
3126         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3127         ring->wptr = 0;
3128         WREG32(CP_RB0_WPTR, ring->wptr);
3129
3130         /* set the wb address whether it's enabled or not */
3131         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3132         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3133
3134         if (rdev->wb.enabled)
3135                 WREG32(SCRATCH_UMSK, 0xff);
3136         else {
3137                 tmp |= RB_NO_UPDATE;
3138                 WREG32(SCRATCH_UMSK, 0);
3139         }
3140
3141         mdelay(1);
3142         WREG32(CP_RB0_CNTL, tmp);
3143
3144         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3145
3146         ring->rptr = RREG32(CP_RB0_RPTR);
3147
3148         /* ring1  - compute only */
3149         /* Set ring buffer size */
3150         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3151         rb_bufsz = drm_order(ring->ring_size / 8);
3152         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3153 #ifdef __BIG_ENDIAN
3154         tmp |= BUF_SWAP_32BIT;
3155 #endif
3156         WREG32(CP_RB1_CNTL, tmp);
3157
3158         /* Initialize the ring buffer's read and write pointers */
3159         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3160         ring->wptr = 0;
3161         WREG32(CP_RB1_WPTR, ring->wptr);
3162
3163         /* set the wb address whether it's enabled or not */
3164         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3165         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3166
3167         mdelay(1);
3168         WREG32(CP_RB1_CNTL, tmp);
3169
3170         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3171
3172         ring->rptr = RREG32(CP_RB1_RPTR);
3173
3174         /* ring2 - compute only */
3175         /* Set ring buffer size */
3176         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3177         rb_bufsz = drm_order(ring->ring_size / 8);
3178         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3179 #ifdef __BIG_ENDIAN
3180         tmp |= BUF_SWAP_32BIT;
3181 #endif
3182         WREG32(CP_RB2_CNTL, tmp);
3183
3184         /* Initialize the ring buffer's read and write pointers */
3185         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3186         ring->wptr = 0;
3187         WREG32(CP_RB2_WPTR, ring->wptr);
3188
3189         /* set the wb address whether it's enabled or not */
3190         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3191         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3192
3193         mdelay(1);
3194         WREG32(CP_RB2_CNTL, tmp);
3195
3196         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3197
3198         ring->rptr = RREG32(CP_RB2_RPTR);
3199
3200         /* start the rings */
3201         si_cp_start(rdev);
3202         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3203         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3204         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3205         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3206         if (r) {
3207                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3208                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3209                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3210                 return r;
3211         }
3212         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3213         if (r) {
3214                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3215         }
3216         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3217         if (r) {
3218                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3219         }
3220
3221         return 0;
3222 }
3223
3224 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3225 {
3226         u32 reset_mask = 0;
3227         u32 tmp;
3228
3229         /* GRBM_STATUS */
3230         tmp = RREG32(GRBM_STATUS);
3231         if (tmp & (PA_BUSY | SC_BUSY |
3232                    BCI_BUSY | SX_BUSY |
3233                    TA_BUSY | VGT_BUSY |
3234                    DB_BUSY | CB_BUSY |
3235                    GDS_BUSY | SPI_BUSY |
3236                    IA_BUSY | IA_BUSY_NO_DMA))
3237                 reset_mask |= RADEON_RESET_GFX;
3238
3239         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3240                    CP_BUSY | CP_COHERENCY_BUSY))
3241                 reset_mask |= RADEON_RESET_CP;
3242
3243         if (tmp & GRBM_EE_BUSY)
3244                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3245
3246         /* GRBM_STATUS2 */
3247         tmp = RREG32(GRBM_STATUS2);
3248         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3249                 reset_mask |= RADEON_RESET_RLC;
3250
3251         /* DMA_STATUS_REG 0 */
3252         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3253         if (!(tmp & DMA_IDLE))
3254                 reset_mask |= RADEON_RESET_DMA;
3255
3256         /* DMA_STATUS_REG 1 */
3257         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3258         if (!(tmp & DMA_IDLE))
3259                 reset_mask |= RADEON_RESET_DMA1;
3260
3261         /* SRBM_STATUS2 */
3262         tmp = RREG32(SRBM_STATUS2);
3263         if (tmp & DMA_BUSY)
3264                 reset_mask |= RADEON_RESET_DMA;
3265
3266         if (tmp & DMA1_BUSY)
3267                 reset_mask |= RADEON_RESET_DMA1;
3268
3269         /* SRBM_STATUS */
3270         tmp = RREG32(SRBM_STATUS);
3271
3272         if (tmp & IH_BUSY)
3273                 reset_mask |= RADEON_RESET_IH;
3274
3275         if (tmp & SEM_BUSY)
3276                 reset_mask |= RADEON_RESET_SEM;
3277
3278         if (tmp & GRBM_RQ_PENDING)
3279                 reset_mask |= RADEON_RESET_GRBM;
3280
3281         if (tmp & VMC_BUSY)
3282                 reset_mask |= RADEON_RESET_VMC;
3283
3284         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3285                    MCC_BUSY | MCD_BUSY))
3286                 reset_mask |= RADEON_RESET_MC;
3287
3288         if (evergreen_is_display_hung(rdev))
3289                 reset_mask |= RADEON_RESET_DISPLAY;
3290
3291         /* VM_L2_STATUS */
3292         tmp = RREG32(VM_L2_STATUS);
3293         if (tmp & L2_BUSY)
3294                 reset_mask |= RADEON_RESET_VMC;
3295
3296         /* Skip MC reset as it's mostly likely not hung, just busy */
3297         if (reset_mask & RADEON_RESET_MC) {
3298                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3299                 reset_mask &= ~RADEON_RESET_MC;
3300         }
3301
3302         return reset_mask;
3303 }
3304
3305 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3306 {
3307         struct evergreen_mc_save save;
3308         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3309         u32 tmp;
3310
3311         if (reset_mask == 0)
3312                 return;
3313
3314         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3315
3316         evergreen_print_gpu_status_regs(rdev);
3317         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3318                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3319         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3320                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3321
3322         /* Disable CP parsing/prefetching */
3323         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3324
3325         if (reset_mask & RADEON_RESET_DMA) {
3326                 /* dma0 */
3327                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3328                 tmp &= ~DMA_RB_ENABLE;
3329                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3330         }
3331         if (reset_mask & RADEON_RESET_DMA1) {
3332                 /* dma1 */
3333                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3334                 tmp &= ~DMA_RB_ENABLE;
3335                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3336         }
3337
3338         udelay(50);
3339
3340         evergreen_mc_stop(rdev, &save);
3341         if (evergreen_mc_wait_for_idle(rdev)) {
3342                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3343         }
3344
3345         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3346                 grbm_soft_reset = SOFT_RESET_CB |
3347                         SOFT_RESET_DB |
3348                         SOFT_RESET_GDS |
3349                         SOFT_RESET_PA |
3350                         SOFT_RESET_SC |
3351                         SOFT_RESET_BCI |
3352                         SOFT_RESET_SPI |
3353                         SOFT_RESET_SX |
3354                         SOFT_RESET_TC |
3355                         SOFT_RESET_TA |
3356                         SOFT_RESET_VGT |
3357                         SOFT_RESET_IA;
3358         }
3359
3360         if (reset_mask & RADEON_RESET_CP) {
3361                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3362
3363                 srbm_soft_reset |= SOFT_RESET_GRBM;
3364         }
3365
3366         if (reset_mask & RADEON_RESET_DMA)
3367                 srbm_soft_reset |= SOFT_RESET_DMA;
3368
3369         if (reset_mask & RADEON_RESET_DMA1)
3370                 srbm_soft_reset |= SOFT_RESET_DMA1;
3371
3372         if (reset_mask & RADEON_RESET_DISPLAY)
3373                 srbm_soft_reset |= SOFT_RESET_DC;
3374
3375         if (reset_mask & RADEON_RESET_RLC)
3376                 grbm_soft_reset |= SOFT_RESET_RLC;
3377
3378         if (reset_mask & RADEON_RESET_SEM)
3379                 srbm_soft_reset |= SOFT_RESET_SEM;
3380
3381         if (reset_mask & RADEON_RESET_IH)
3382                 srbm_soft_reset |= SOFT_RESET_IH;
3383
3384         if (reset_mask & RADEON_RESET_GRBM)
3385                 srbm_soft_reset |= SOFT_RESET_GRBM;
3386
3387         if (reset_mask & RADEON_RESET_VMC)
3388                 srbm_soft_reset |= SOFT_RESET_VMC;
3389
3390         if (reset_mask & RADEON_RESET_MC)
3391                 srbm_soft_reset |= SOFT_RESET_MC;
3392
3393         if (grbm_soft_reset) {
3394                 tmp = RREG32(GRBM_SOFT_RESET);
3395                 tmp |= grbm_soft_reset;
3396                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3397                 WREG32(GRBM_SOFT_RESET, tmp);
3398                 tmp = RREG32(GRBM_SOFT_RESET);
3399
3400                 udelay(50);
3401
3402                 tmp &= ~grbm_soft_reset;
3403                 WREG32(GRBM_SOFT_RESET, tmp);
3404                 tmp = RREG32(GRBM_SOFT_RESET);
3405         }
3406
3407         if (srbm_soft_reset) {
3408                 tmp = RREG32(SRBM_SOFT_RESET);
3409                 tmp |= srbm_soft_reset;
3410                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3411                 WREG32(SRBM_SOFT_RESET, tmp);
3412                 tmp = RREG32(SRBM_SOFT_RESET);
3413
3414                 udelay(50);
3415
3416                 tmp &= ~srbm_soft_reset;
3417                 WREG32(SRBM_SOFT_RESET, tmp);
3418                 tmp = RREG32(SRBM_SOFT_RESET);
3419         }
3420
3421         /* Wait a little for things to settle down */
3422         udelay(50);
3423
3424         evergreen_mc_resume(rdev, &save);
3425         udelay(50);
3426
3427         evergreen_print_gpu_status_regs(rdev);
3428 }
3429
3430 int si_asic_reset(struct radeon_device *rdev)
3431 {
3432         u32 reset_mask;
3433
3434         reset_mask = si_gpu_check_soft_reset(rdev);
3435
3436         if (reset_mask)
3437                 r600_set_bios_scratch_engine_hung(rdev, true);
3438
3439         si_gpu_soft_reset(rdev, reset_mask);
3440
3441         reset_mask = si_gpu_check_soft_reset(rdev);
3442
3443         if (!reset_mask)
3444                 r600_set_bios_scratch_engine_hung(rdev, false);
3445
3446         return 0;
3447 }
3448
3449 /**
3450  * si_gfx_is_lockup - Check if the GFX engine is locked up
3451  *
3452  * @rdev: radeon_device pointer
3453  * @ring: radeon_ring structure holding ring information
3454  *
3455  * Check if the GFX engine is locked up.
3456  * Returns true if the engine appears to be locked up, false if not.
3457  */
3458 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3459 {
3460         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3461
3462         if (!(reset_mask & (RADEON_RESET_GFX |
3463                             RADEON_RESET_COMPUTE |
3464                             RADEON_RESET_CP))) {
3465                 radeon_ring_lockup_update(ring);
3466                 return false;
3467         }
3468         /* force CP activities */
3469         radeon_ring_force_activity(rdev, ring);
3470         return radeon_ring_test_lockup(rdev, ring);
3471 }
3472
3473 /**
3474  * si_dma_is_lockup - Check if the DMA engine is locked up
3475  *
3476  * @rdev: radeon_device pointer
3477  * @ring: radeon_ring structure holding ring information
3478  *
3479  * Check if the async DMA engine is locked up.
3480  * Returns true if the engine appears to be locked up, false if not.
3481  */
3482 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3483 {
3484         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3485         u32 mask;
3486
3487         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3488                 mask = RADEON_RESET_DMA;
3489         else
3490                 mask = RADEON_RESET_DMA1;
3491
3492         if (!(reset_mask & mask)) {
3493                 radeon_ring_lockup_update(ring);
3494                 return false;
3495         }
3496         /* force ring activities */
3497         radeon_ring_force_activity(rdev, ring);
3498         return radeon_ring_test_lockup(rdev, ring);
3499 }
3500
3501 /* MC */
3502 static void si_mc_program(struct radeon_device *rdev)
3503 {
3504         struct evergreen_mc_save save;
3505         u32 tmp;
3506         int i, j;
3507
3508         /* Initialize HDP */
3509         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3510                 WREG32((0x2c14 + j), 0x00000000);
3511                 WREG32((0x2c18 + j), 0x00000000);
3512                 WREG32((0x2c1c + j), 0x00000000);
3513                 WREG32((0x2c20 + j), 0x00000000);
3514                 WREG32((0x2c24 + j), 0x00000000);
3515         }
3516         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3517
3518         evergreen_mc_stop(rdev, &save);
3519         if (radeon_mc_wait_for_idle(rdev)) {
3520                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3521         }
3522         if (!ASIC_IS_NODCE(rdev))
3523                 /* Lockout access through VGA aperture*/
3524                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3525         /* Update configuration */
3526         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3527                rdev->mc.vram_start >> 12);
3528         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3529                rdev->mc.vram_end >> 12);
3530         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3531                rdev->vram_scratch.gpu_addr >> 12);
3532         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3533         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3534         WREG32(MC_VM_FB_LOCATION, tmp);
3535         /* XXX double check these! */
3536         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3537         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3538         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3539         WREG32(MC_VM_AGP_BASE, 0);
3540         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3541         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3542         if (radeon_mc_wait_for_idle(rdev)) {
3543                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3544         }
3545         evergreen_mc_resume(rdev, &save);
3546         if (!ASIC_IS_NODCE(rdev)) {
3547                 /* we need to own VRAM, so turn off the VGA renderer here
3548                  * to stop it overwriting our objects */
3549                 rv515_vga_render_disable(rdev);
3550         }
3551 }
3552
3553 static void si_vram_gtt_location(struct radeon_device *rdev,
3554                                  struct radeon_mc *mc)
3555 {
3556         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3557                 /* leave room for at least 1024M GTT */
3558                 dev_warn(rdev->dev, "limiting VRAM\n");
3559                 mc->real_vram_size = 0xFFC0000000ULL;
3560                 mc->mc_vram_size = 0xFFC0000000ULL;
3561         }
3562         radeon_vram_location(rdev, &rdev->mc, 0);
3563         rdev->mc.gtt_base_align = 0;
3564         radeon_gtt_location(rdev, mc);
3565 }
3566
3567 static int si_mc_init(struct radeon_device *rdev)
3568 {
3569         u32 tmp;
3570         int chansize, numchan;
3571
3572         /* Get VRAM informations */
3573         rdev->mc.vram_is_ddr = true;
3574         tmp = RREG32(MC_ARB_RAMCFG);
3575         if (tmp & CHANSIZE_OVERRIDE) {
3576                 chansize = 16;
3577         } else if (tmp & CHANSIZE_MASK) {
3578                 chansize = 64;
3579         } else {
3580                 chansize = 32;
3581         }
3582         tmp = RREG32(MC_SHARED_CHMAP);
3583         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3584         case 0:
3585         default:
3586                 numchan = 1;
3587                 break;
3588         case 1:
3589                 numchan = 2;
3590                 break;
3591         case 2:
3592                 numchan = 4;
3593                 break;
3594         case 3:
3595                 numchan = 8;
3596                 break;
3597         case 4:
3598                 numchan = 3;
3599                 break;
3600         case 5:
3601                 numchan = 6;
3602                 break;
3603         case 6:
3604                 numchan = 10;
3605                 break;
3606         case 7:
3607                 numchan = 12;
3608                 break;
3609         case 8:
3610                 numchan = 16;
3611                 break;
3612         }
3613         rdev->mc.vram_width = numchan * chansize;
3614         /* Could aper size report 0 ? */
3615         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3616         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3617         /* size in MB on si */
3618         tmp = RREG32(CONFIG_MEMSIZE);
3619         /* some boards may have garbage in the upper 16 bits */
3620         if (tmp & 0xffff0000) {
3621                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
3622                 if (tmp & 0xffff)
3623                         tmp &= 0xffff;
3624         }
3625         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
3626         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
3627         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3628         si_vram_gtt_location(rdev, &rdev->mc);
3629         radeon_update_bandwidth_info(rdev);
3630
3631         return 0;
3632 }
3633
3634 /*
3635  * GART
3636  */
3637 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3638 {
3639         /* flush hdp cache */
3640         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3641
3642         /* bits 0-15 are the VM contexts0-15 */
3643         WREG32(VM_INVALIDATE_REQUEST, 1);
3644 }
3645
3646 static int si_pcie_gart_enable(struct radeon_device *rdev)
3647 {
3648         int r, i;
3649
3650         if (rdev->gart.robj == NULL) {
3651                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3652                 return -EINVAL;
3653         }
3654         r = radeon_gart_table_vram_pin(rdev);
3655         if (r)
3656                 return r;
3657         radeon_gart_restore(rdev);
3658         /* Setup TLB control */
3659         WREG32(MC_VM_MX_L1_TLB_CNTL,
3660                (0xA << 7) |
3661                ENABLE_L1_TLB |
3662                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3663                ENABLE_ADVANCED_DRIVER_MODEL |
3664                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3665         /* Setup L2 cache */
3666         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3667                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3668                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3669                EFFECTIVE_L2_QUEUE_SIZE(7) |
3670                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3671         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3672         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3673                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3674         /* setup context0 */
3675         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3676         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3677         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3678         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3679                         (u32)(rdev->dummy_page.addr >> 12));
3680         WREG32(VM_CONTEXT0_CNTL2, 0);
3681         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3682                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3683
3684         WREG32(0x15D4, 0);
3685         WREG32(0x15D8, 0);
3686         WREG32(0x15DC, 0);
3687
3688         /* empty context1-15 */
3689         /* set vm size, must be a multiple of 4 */
3690         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3691         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3692         /* Assign the pt base to something valid for now; the pts used for
3693          * the VMs are determined by the application and setup and assigned
3694          * on the fly in the vm part of radeon_gart.c
3695          */
3696         for (i = 1; i < 16; i++) {
3697                 if (i < 8)
3698                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3699                                rdev->gart.table_addr >> 12);
3700                 else
3701                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3702                                rdev->gart.table_addr >> 12);
3703         }
3704
3705         /* enable context1-15 */
3706         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3707                (u32)(rdev->dummy_page.addr >> 12));
3708         WREG32(VM_CONTEXT1_CNTL2, 4);
3709         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3710                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3711                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3712                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3713                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3714                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3715                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3716                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3717                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3718                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3719                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3720                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3721                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3722
3723         si_pcie_gart_tlb_flush(rdev);
3724         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3725                  (unsigned)(rdev->mc.gtt_size >> 20),
3726                  (unsigned long long)rdev->gart.table_addr);
3727         rdev->gart.ready = true;
3728         return 0;
3729 }
3730
3731 static void si_pcie_gart_disable(struct radeon_device *rdev)
3732 {
3733         /* Disable all tables */
3734         WREG32(VM_CONTEXT0_CNTL, 0);
3735         WREG32(VM_CONTEXT1_CNTL, 0);
3736         /* Setup TLB control */
3737         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3738                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3739         /* Setup L2 cache */
3740         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3741                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3742                EFFECTIVE_L2_QUEUE_SIZE(7) |
3743                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3744         WREG32(VM_L2_CNTL2, 0);
3745         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3746                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3747         radeon_gart_table_vram_unpin(rdev);
3748 }
3749
3750 static void si_pcie_gart_fini(struct radeon_device *rdev)
3751 {
3752         si_pcie_gart_disable(rdev);
3753         radeon_gart_table_vram_free(rdev);
3754         radeon_gart_fini(rdev);
3755 }
3756
3757 /* vm parser */
3758 static bool si_vm_reg_valid(u32 reg)
3759 {
3760         /* context regs are fine */
3761         if (reg >= 0x28000)
3762                 return true;
3763
3764         /* check config regs */
3765         switch (reg) {
3766         case GRBM_GFX_INDEX:
3767         case CP_STRMOUT_CNTL:
3768         case VGT_VTX_VECT_EJECT_REG:
3769         case VGT_CACHE_INVALIDATION:
3770         case VGT_ESGS_RING_SIZE:
3771         case VGT_GSVS_RING_SIZE:
3772         case VGT_GS_VERTEX_REUSE:
3773         case VGT_PRIMITIVE_TYPE:
3774         case VGT_INDEX_TYPE:
3775         case VGT_NUM_INDICES:
3776         case VGT_NUM_INSTANCES:
3777         case VGT_TF_RING_SIZE:
3778         case VGT_HS_OFFCHIP_PARAM:
3779         case VGT_TF_MEMORY_BASE:
3780         case PA_CL_ENHANCE:
3781         case PA_SU_LINE_STIPPLE_VALUE:
3782         case PA_SC_LINE_STIPPLE_STATE:
3783         case PA_SC_ENHANCE:
3784         case SQC_CACHES:
3785         case SPI_STATIC_THREAD_MGMT_1:
3786         case SPI_STATIC_THREAD_MGMT_2:
3787         case SPI_STATIC_THREAD_MGMT_3:
3788         case SPI_PS_MAX_WAVE_ID:
3789         case SPI_CONFIG_CNTL:
3790         case SPI_CONFIG_CNTL_1:
3791         case TA_CNTL_AUX:
3792                 return true;
3793         default:
3794                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3795                 return false;
3796         }
3797 }
3798
3799 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3800                                   u32 *ib, struct radeon_cs_packet *pkt)
3801 {
3802         switch (pkt->opcode) {
3803         case PACKET3_NOP:
3804         case PACKET3_SET_BASE:
3805         case PACKET3_SET_CE_DE_COUNTERS:
3806         case PACKET3_LOAD_CONST_RAM:
3807         case PACKET3_WRITE_CONST_RAM:
3808         case PACKET3_WRITE_CONST_RAM_OFFSET:
3809         case PACKET3_DUMP_CONST_RAM:
3810         case PACKET3_INCREMENT_CE_COUNTER:
3811         case PACKET3_WAIT_ON_DE_COUNTER:
3812         case PACKET3_CE_WRITE:
3813                 break;
3814         default:
3815                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3816                 return -EINVAL;
3817         }
3818         return 0;
3819 }
3820
3821 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
3822 {
3823         u32 start_reg, reg, i;
3824         u32 command = ib[idx + 4];
3825         u32 info = ib[idx + 1];
3826         u32 idx_value = ib[idx];
3827         if (command & PACKET3_CP_DMA_CMD_SAS) {
3828                 /* src address space is register */
3829                 if (((info & 0x60000000) >> 29) == 0) {
3830                         start_reg = idx_value << 2;
3831                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
3832                                 reg = start_reg;
3833                                 if (!si_vm_reg_valid(reg)) {
3834                                         DRM_ERROR("CP DMA Bad SRC register\n");
3835                                         return -EINVAL;
3836                                 }
3837                         } else {
3838                                 for (i = 0; i < (command & 0x1fffff); i++) {
3839                                         reg = start_reg + (4 * i);
3840                                         if (!si_vm_reg_valid(reg)) {
3841                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3842                                                 return -EINVAL;
3843                                         }
3844                                 }
3845                         }
3846                 }
3847         }
3848         if (command & PACKET3_CP_DMA_CMD_DAS) {
3849                 /* dst address space is register */
3850                 if (((info & 0x00300000) >> 20) == 0) {
3851                         start_reg = ib[idx + 2];
3852                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
3853                                 reg = start_reg;
3854                                 if (!si_vm_reg_valid(reg)) {
3855                                         DRM_ERROR("CP DMA Bad DST register\n");
3856                                         return -EINVAL;
3857                                 }
3858                         } else {
3859                                 for (i = 0; i < (command & 0x1fffff); i++) {
3860                                         reg = start_reg + (4 * i);
3861                                 if (!si_vm_reg_valid(reg)) {
3862                                                 DRM_ERROR("CP DMA Bad DST register\n");
3863                                                 return -EINVAL;
3864                                         }
3865                                 }
3866                         }
3867                 }
3868         }
3869         return 0;
3870 }
3871
3872 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3873                                    u32 *ib, struct radeon_cs_packet *pkt)
3874 {
3875         int r;
3876         u32 idx = pkt->idx + 1;
3877         u32 idx_value = ib[idx];
3878         u32 start_reg, end_reg, reg, i;
3879
3880         switch (pkt->opcode) {
3881         case PACKET3_NOP:
3882         case PACKET3_SET_BASE:
3883         case PACKET3_CLEAR_STATE:
3884         case PACKET3_INDEX_BUFFER_SIZE:
3885         case PACKET3_DISPATCH_DIRECT:
3886         case PACKET3_DISPATCH_INDIRECT:
3887         case PACKET3_ALLOC_GDS:
3888         case PACKET3_WRITE_GDS_RAM:
3889         case PACKET3_ATOMIC_GDS:
3890         case PACKET3_ATOMIC:
3891         case PACKET3_OCCLUSION_QUERY:
3892         case PACKET3_SET_PREDICATION:
3893         case PACKET3_COND_EXEC:
3894         case PACKET3_PRED_EXEC:
3895         case PACKET3_DRAW_INDIRECT:
3896         case PACKET3_DRAW_INDEX_INDIRECT:
3897         case PACKET3_INDEX_BASE:
3898         case PACKET3_DRAW_INDEX_2:
3899         case PACKET3_CONTEXT_CONTROL:
3900         case PACKET3_INDEX_TYPE:
3901         case PACKET3_DRAW_INDIRECT_MULTI:
3902         case PACKET3_DRAW_INDEX_AUTO:
3903         case PACKET3_DRAW_INDEX_IMMD:
3904         case PACKET3_NUM_INSTANCES:
3905         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3906         case PACKET3_STRMOUT_BUFFER_UPDATE:
3907         case PACKET3_DRAW_INDEX_OFFSET_2:
3908         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3909         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3910         case PACKET3_MPEG_INDEX:
3911         case PACKET3_WAIT_REG_MEM:
3912         case PACKET3_MEM_WRITE:
3913         case PACKET3_PFP_SYNC_ME:
3914         case PACKET3_SURFACE_SYNC:
3915         case PACKET3_EVENT_WRITE:
3916         case PACKET3_EVENT_WRITE_EOP:
3917         case PACKET3_EVENT_WRITE_EOS:
3918         case PACKET3_SET_CONTEXT_REG:
3919         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3920         case PACKET3_SET_SH_REG:
3921         case PACKET3_SET_SH_REG_OFFSET:
3922         case PACKET3_INCREMENT_DE_COUNTER:
3923         case PACKET3_WAIT_ON_CE_COUNTER:
3924         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3925         case PACKET3_ME_WRITE:
3926                 break;
3927         case PACKET3_COPY_DATA:
3928                 if ((idx_value & 0xf00) == 0) {
3929                         reg = ib[idx + 3] * 4;
3930                         if (!si_vm_reg_valid(reg))
3931                                 return -EINVAL;
3932                 }
3933                 break;
3934         case PACKET3_WRITE_DATA:
3935                 if ((idx_value & 0xf00) == 0) {
3936                         start_reg = ib[idx + 1] * 4;
3937                         if (idx_value & 0x10000) {
3938                                 if (!si_vm_reg_valid(start_reg))
3939                                         return -EINVAL;
3940                         } else {
3941                                 for (i = 0; i < (pkt->count - 2); i++) {
3942                                         reg = start_reg + (4 * i);
3943                                         if (!si_vm_reg_valid(reg))
3944                                                 return -EINVAL;
3945                                 }
3946                         }
3947                 }
3948                 break;
3949         case PACKET3_COND_WRITE:
3950                 if (idx_value & 0x100) {
3951                         reg = ib[idx + 5] * 4;
3952                         if (!si_vm_reg_valid(reg))
3953                                 return -EINVAL;
3954                 }
3955                 break;
3956         case PACKET3_COPY_DW:
3957                 if (idx_value & 0x2) {
3958                         reg = ib[idx + 3] * 4;
3959                         if (!si_vm_reg_valid(reg))
3960                                 return -EINVAL;
3961                 }
3962                 break;
3963         case PACKET3_SET_CONFIG_REG:
3964                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3965                 end_reg = 4 * pkt->count + start_reg - 4;
3966                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3967                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3968                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3969                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3970                         return -EINVAL;
3971                 }
3972                 for (i = 0; i < pkt->count; i++) {
3973                         reg = start_reg + (4 * i);
3974                         if (!si_vm_reg_valid(reg))
3975                                 return -EINVAL;
3976                 }
3977                 break;
3978         case PACKET3_CP_DMA:
3979                 r = si_vm_packet3_cp_dma_check(ib, idx);
3980                 if (r)
3981                         return r;
3982                 break;
3983         default:
3984                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3985                 return -EINVAL;
3986         }
3987         return 0;
3988 }
3989
3990 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3991                                        u32 *ib, struct radeon_cs_packet *pkt)
3992 {
3993         int r;
3994         u32 idx = pkt->idx + 1;
3995         u32 idx_value = ib[idx];
3996         u32 start_reg, reg, i;
3997
3998         switch (pkt->opcode) {
3999         case PACKET3_NOP:
4000         case PACKET3_SET_BASE:
4001         case PACKET3_CLEAR_STATE:
4002         case PACKET3_DISPATCH_DIRECT:
4003         case PACKET3_DISPATCH_INDIRECT:
4004         case PACKET3_ALLOC_GDS:
4005         case PACKET3_WRITE_GDS_RAM:
4006         case PACKET3_ATOMIC_GDS:
4007         case PACKET3_ATOMIC:
4008         case PACKET3_OCCLUSION_QUERY:
4009         case PACKET3_SET_PREDICATION:
4010         case PACKET3_COND_EXEC:
4011         case PACKET3_PRED_EXEC:
4012         case PACKET3_CONTEXT_CONTROL:
4013         case PACKET3_STRMOUT_BUFFER_UPDATE:
4014         case PACKET3_WAIT_REG_MEM:
4015         case PACKET3_MEM_WRITE:
4016         case PACKET3_PFP_SYNC_ME:
4017         case PACKET3_SURFACE_SYNC:
4018         case PACKET3_EVENT_WRITE:
4019         case PACKET3_EVENT_WRITE_EOP:
4020         case PACKET3_EVENT_WRITE_EOS:
4021         case PACKET3_SET_CONTEXT_REG:
4022         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4023         case PACKET3_SET_SH_REG:
4024         case PACKET3_SET_SH_REG_OFFSET:
4025         case PACKET3_INCREMENT_DE_COUNTER:
4026         case PACKET3_WAIT_ON_CE_COUNTER:
4027         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4028         case PACKET3_ME_WRITE:
4029                 break;
4030         case PACKET3_COPY_DATA:
4031                 if ((idx_value & 0xf00) == 0) {
4032                         reg = ib[idx + 3] * 4;
4033                         if (!si_vm_reg_valid(reg))
4034                                 return -EINVAL;
4035                 }
4036                 break;
4037         case PACKET3_WRITE_DATA:
4038                 if ((idx_value & 0xf00) == 0) {
4039                         start_reg = ib[idx + 1] * 4;
4040                         if (idx_value & 0x10000) {
4041                                 if (!si_vm_reg_valid(start_reg))
4042                                         return -EINVAL;
4043                         } else {
4044                                 for (i = 0; i < (pkt->count - 2); i++) {
4045                                         reg = start_reg + (4 * i);
4046                                         if (!si_vm_reg_valid(reg))
4047                                                 return -EINVAL;
4048                                 }
4049                         }
4050                 }
4051                 break;
4052         case PACKET3_COND_WRITE:
4053                 if (idx_value & 0x100) {
4054                         reg = ib[idx + 5] * 4;
4055                         if (!si_vm_reg_valid(reg))
4056                                 return -EINVAL;
4057                 }
4058                 break;
4059         case PACKET3_COPY_DW:
4060                 if (idx_value & 0x2) {
4061                         reg = ib[idx + 3] * 4;
4062                         if (!si_vm_reg_valid(reg))
4063                                 return -EINVAL;
4064                 }
4065                 break;
4066         case PACKET3_CP_DMA:
4067                 r = si_vm_packet3_cp_dma_check(ib, idx);
4068                 if (r)
4069                         return r;
4070                 break;
4071         default:
4072                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4073                 return -EINVAL;
4074         }
4075         return 0;
4076 }
4077
4078 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4079 {
4080         int ret = 0;
4081         u32 idx = 0;
4082         struct radeon_cs_packet pkt;
4083
4084         do {
4085                 pkt.idx = idx;
4086                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4087                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4088                 pkt.one_reg_wr = 0;
4089                 switch (pkt.type) {
4090                 case RADEON_PACKET_TYPE0:
4091                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4092                         ret = -EINVAL;
4093                         break;
4094                 case RADEON_PACKET_TYPE2:
4095                         idx += 1;
4096                         break;
4097                 case RADEON_PACKET_TYPE3:
4098                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4099                         if (ib->is_const_ib)
4100                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4101                         else {
4102                                 switch (ib->ring) {
4103                                 case RADEON_RING_TYPE_GFX_INDEX:
4104                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4105                                         break;
4106                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4107                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4108                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4109                                         break;
4110                                 default:
4111                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4112                                         ret = -EINVAL;
4113                                         break;
4114                                 }
4115                         }
4116                         idx += pkt.count + 2;
4117                         break;
4118                 default:
4119                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4120                         ret = -EINVAL;
4121                         break;
4122                 }
4123                 if (ret)
4124                         break;
4125         } while (idx < ib->length_dw);
4126
4127         return ret;
4128 }
4129
4130 /*
4131  * vm
4132  */
4133 int si_vm_init(struct radeon_device *rdev)
4134 {
4135         /* number of VMs */
4136         rdev->vm_manager.nvm = 16;
4137         /* base offset of vram pages */
4138         rdev->vm_manager.vram_base_offset = 0;
4139
4140         return 0;
4141 }
4142
4143 void si_vm_fini(struct radeon_device *rdev)
4144 {
4145 }
4146
4147 /**
4148  * si_vm_set_page - update the page tables using the CP
4149  *
4150  * @rdev: radeon_device pointer
4151  * @ib: indirect buffer to fill with commands
4152  * @pe: addr of the page entry
4153  * @addr: dst addr to write into pe
4154  * @count: number of page entries to update
4155  * @incr: increase next addr by incr bytes
4156  * @flags: access flags
4157  *
4158  * Update the page tables using the CP (SI).
4159  */
4160 void si_vm_set_page(struct radeon_device *rdev,
4161                     struct radeon_ib *ib,
4162                     uint64_t pe,
4163                     uint64_t addr, unsigned count,
4164                     uint32_t incr, uint32_t flags)
4165 {
4166         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4167         uint64_t value;
4168         unsigned ndw;
4169
4170         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4171                 while (count) {
4172                         ndw = 2 + count * 2;
4173                         if (ndw > 0x3FFE)
4174                                 ndw = 0x3FFE;
4175
4176                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4177                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4178                                         WRITE_DATA_DST_SEL(1));
4179                         ib->ptr[ib->length_dw++] = pe;
4180                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4181                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4182                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4183                                         value = radeon_vm_map_gart(rdev, addr);
4184                                         value &= 0xFFFFFFFFFFFFF000ULL;
4185                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4186                                         value = addr;
4187                                 } else {
4188                                         value = 0;
4189                                 }
4190                                 addr += incr;
4191                                 value |= r600_flags;
4192                                 ib->ptr[ib->length_dw++] = value;
4193                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4194                         }
4195                 }
4196         } else {
4197                 /* DMA */
4198                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4199                         while (count) {
4200                                 ndw = count * 2;
4201                                 if (ndw > 0xFFFFE)
4202                                         ndw = 0xFFFFE;
4203
4204                                 /* for non-physically contiguous pages (system) */
4205                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4206                                 ib->ptr[ib->length_dw++] = pe;
4207                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4208                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4209                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4210                                                 value = radeon_vm_map_gart(rdev, addr);
4211                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4212                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4213                                                 value = addr;
4214                                         } else {
4215                                                 value = 0;
4216                                         }
4217                                         addr += incr;
4218                                         value |= r600_flags;
4219                                         ib->ptr[ib->length_dw++] = value;
4220                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4221                                 }
4222                         }
4223                 } else {
4224                         while (count) {
4225                                 ndw = count * 2;
4226                                 if (ndw > 0xFFFFE)
4227                                         ndw = 0xFFFFE;
4228
4229                                 if (flags & RADEON_VM_PAGE_VALID)
4230                                         value = addr;
4231                                 else
4232                                         value = 0;
4233                                 /* for physically contiguous pages (vram) */
4234                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4235                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4236                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4237                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4238                                 ib->ptr[ib->length_dw++] = 0;
4239                                 ib->ptr[ib->length_dw++] = value; /* value */
4240                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4241                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4242                                 ib->ptr[ib->length_dw++] = 0;
4243                                 pe += ndw * 4;
4244                                 addr += (ndw / 2) * incr;
4245                                 count -= ndw / 2;
4246                         }
4247                 }
4248                 while (ib->length_dw & 0x7)
4249                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4250         }
4251 }
4252
4253 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4254 {
4255         struct radeon_ring *ring = &rdev->ring[ridx];
4256
4257         if (vm == NULL)
4258                 return;
4259
4260         /* write new base address */
4261         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4262         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4263                                  WRITE_DATA_DST_SEL(0)));
4264
4265         if (vm->id < 8) {
4266                 radeon_ring_write(ring,
4267                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4268         } else {
4269                 radeon_ring_write(ring,
4270                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4271         }
4272         radeon_ring_write(ring, 0);
4273         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4274
4275         /* flush hdp cache */
4276         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4277         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4278                                  WRITE_DATA_DST_SEL(0)));
4279         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4280         radeon_ring_write(ring, 0);
4281         radeon_ring_write(ring, 0x1);
4282
4283         /* bits 0-15 are the VM contexts0-15 */
4284         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4285         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4286                                  WRITE_DATA_DST_SEL(0)));
4287         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4288         radeon_ring_write(ring, 0);
4289         radeon_ring_write(ring, 1 << vm->id);
4290
4291         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4292         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4293         radeon_ring_write(ring, 0x0);
4294 }
4295
4296 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4297 {
4298         struct radeon_ring *ring = &rdev->ring[ridx];
4299
4300         if (vm == NULL)
4301                 return;
4302
4303         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4304         if (vm->id < 8) {
4305                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4306         } else {
4307                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4308         }
4309         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4310
4311         /* flush hdp cache */
4312         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4313         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4314         radeon_ring_write(ring, 1);
4315
4316         /* bits 0-7 are the VM contexts0-7 */
4317         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4318         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4319         radeon_ring_write(ring, 1 << vm->id);
4320 }
4321
4322 /*
4323  * RLC
4324  */
4325 void si_rlc_fini(struct radeon_device *rdev)
4326 {
4327         int r;
4328
4329         /* save restore block */
4330         if (rdev->rlc.save_restore_obj) {
4331                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4332                 if (unlikely(r != 0))
4333                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4334                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4335                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4336
4337                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4338                 rdev->rlc.save_restore_obj = NULL;
4339         }
4340
4341         /* clear state block */
4342         if (rdev->rlc.clear_state_obj) {
4343                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4344                 if (unlikely(r != 0))
4345                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4346                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4347                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4348
4349                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4350                 rdev->rlc.clear_state_obj = NULL;
4351         }
4352 }
4353
4354 int si_rlc_init(struct radeon_device *rdev)
4355 {
4356         int r;
4357
4358         /* save restore block */
4359         if (rdev->rlc.save_restore_obj == NULL) {
4360                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4361                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4362                                      &rdev->rlc.save_restore_obj);
4363                 if (r) {
4364                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4365                         return r;
4366                 }
4367         }
4368
4369         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4370         if (unlikely(r != 0)) {
4371                 si_rlc_fini(rdev);
4372                 return r;
4373         }
4374         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4375                           &rdev->rlc.save_restore_gpu_addr);
4376         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4377         if (r) {
4378                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4379                 si_rlc_fini(rdev);
4380                 return r;
4381         }
4382
4383         /* clear state block */
4384         if (rdev->rlc.clear_state_obj == NULL) {
4385                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4386                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4387                                      &rdev->rlc.clear_state_obj);
4388                 if (r) {
4389                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4390                         si_rlc_fini(rdev);
4391                         return r;
4392                 }
4393         }
4394         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4395         if (unlikely(r != 0)) {
4396                 si_rlc_fini(rdev);
4397                 return r;
4398         }
4399         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4400                           &rdev->rlc.clear_state_gpu_addr);
4401         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4402         if (r) {
4403                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4404                 si_rlc_fini(rdev);
4405                 return r;
4406         }
4407
4408         return 0;
4409 }
4410
4411 static void si_rlc_stop(struct radeon_device *rdev)
4412 {
4413         WREG32(RLC_CNTL, 0);
4414 }
4415
4416 static void si_rlc_start(struct radeon_device *rdev)
4417 {
4418         WREG32(RLC_CNTL, RLC_ENABLE);
4419 }
4420
4421 static int si_rlc_resume(struct radeon_device *rdev)
4422 {
4423         u32 i;
4424         const __be32 *fw_data;
4425
4426         if (!rdev->rlc_fw)
4427                 return -EINVAL;
4428
4429         si_rlc_stop(rdev);
4430
4431         WREG32(RLC_RL_BASE, 0);
4432         WREG32(RLC_RL_SIZE, 0);
4433         WREG32(RLC_LB_CNTL, 0);
4434         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4435         WREG32(RLC_LB_CNTR_INIT, 0);
4436
4437         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4438         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4439
4440         WREG32(RLC_MC_CNTL, 0);
4441         WREG32(RLC_UCODE_CNTL, 0);
4442
4443         fw_data = (const __be32 *)rdev->rlc_fw->data;
4444         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4445                 WREG32(RLC_UCODE_ADDR, i);
4446                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4447         }
4448         WREG32(RLC_UCODE_ADDR, 0);
4449
4450         si_rlc_start(rdev);
4451
4452         return 0;
4453 }
4454
4455 static void si_enable_interrupts(struct radeon_device *rdev)
4456 {
4457         u32 ih_cntl = RREG32(IH_CNTL);
4458         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4459
4460         ih_cntl |= ENABLE_INTR;
4461         ih_rb_cntl |= IH_RB_ENABLE;
4462         WREG32(IH_CNTL, ih_cntl);
4463         WREG32(IH_RB_CNTL, ih_rb_cntl);
4464         rdev->ih.enabled = true;
4465 }
4466
4467 static void si_disable_interrupts(struct radeon_device *rdev)
4468 {
4469         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4470         u32 ih_cntl = RREG32(IH_CNTL);
4471
4472         ih_rb_cntl &= ~IH_RB_ENABLE;
4473         ih_cntl &= ~ENABLE_INTR;
4474         WREG32(IH_RB_CNTL, ih_rb_cntl);
4475         WREG32(IH_CNTL, ih_cntl);
4476         /* set rptr, wptr to 0 */
4477         WREG32(IH_RB_RPTR, 0);
4478         WREG32(IH_RB_WPTR, 0);
4479         rdev->ih.enabled = false;
4480         rdev->ih.rptr = 0;
4481 }
4482
4483 static void si_disable_interrupt_state(struct radeon_device *rdev)
4484 {
4485         u32 tmp;
4486
4487         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4488         WREG32(CP_INT_CNTL_RING1, 0);
4489         WREG32(CP_INT_CNTL_RING2, 0);
4490         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4491         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4492         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4493         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4494         WREG32(GRBM_INT_CNTL, 0);
4495         if (rdev->num_crtc >= 2) {
4496                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4497                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4498         }
4499         if (rdev->num_crtc >= 4) {
4500                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4501                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4502         }
4503         if (rdev->num_crtc >= 6) {
4504                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4505                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4506         }
4507
4508         if (rdev->num_crtc >= 2) {
4509                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4510                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4511         }
4512         if (rdev->num_crtc >= 4) {
4513                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4514                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4515         }
4516         if (rdev->num_crtc >= 6) {
4517                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4518                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4519         }
4520
4521         if (!ASIC_IS_NODCE(rdev)) {
4522                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4523
4524                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4525                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4526                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4527                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4528                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4529                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4530                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4531                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4532                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4533                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4534                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4535                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4536         }
4537 }
4538
4539 static int si_irq_init(struct radeon_device *rdev)
4540 {
4541         int ret = 0;
4542         int rb_bufsz;
4543         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4544
4545         /* allocate ring */
4546         ret = r600_ih_ring_alloc(rdev);
4547         if (ret)
4548                 return ret;
4549
4550         /* disable irqs */
4551         si_disable_interrupts(rdev);
4552
4553         /* init rlc */
4554         ret = si_rlc_resume(rdev);
4555         if (ret) {
4556                 r600_ih_ring_fini(rdev);
4557                 return ret;
4558         }
4559
4560         /* setup interrupt control */
4561         /* set dummy read address to ring address */
4562         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4563         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4564         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4565          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4566          */
4567         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4568         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4569         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4570         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4571
4572         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4573         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4574
4575         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4576                       IH_WPTR_OVERFLOW_CLEAR |
4577                       (rb_bufsz << 1));
4578
4579         if (rdev->wb.enabled)
4580                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4581
4582         /* set the writeback address whether it's enabled or not */
4583         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4584         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4585
4586         WREG32(IH_RB_CNTL, ih_rb_cntl);
4587
4588         /* set rptr, wptr to 0 */
4589         WREG32(IH_RB_RPTR, 0);
4590         WREG32(IH_RB_WPTR, 0);
4591
4592         /* Default settings for IH_CNTL (disabled at first) */
4593         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4594         /* RPTR_REARM only works if msi's are enabled */
4595         if (rdev->msi_enabled)
4596                 ih_cntl |= RPTR_REARM;
4597         WREG32(IH_CNTL, ih_cntl);
4598
4599         /* force the active interrupt state to all disabled */
4600         si_disable_interrupt_state(rdev);
4601
4602         pci_set_master(rdev->pdev);
4603
4604         /* enable irqs */
4605         si_enable_interrupts(rdev);
4606
4607         return ret;
4608 }
4609
4610 int si_irq_set(struct radeon_device *rdev)
4611 {
4612         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4613         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4614         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4615         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4616         u32 grbm_int_cntl = 0;
4617         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4618         u32 dma_cntl, dma_cntl1;
4619
4620         if (!rdev->irq.installed) {
4621                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4622                 return -EINVAL;
4623         }
4624         /* don't enable anything if the ih is disabled */
4625         if (!rdev->ih.enabled) {
4626                 si_disable_interrupts(rdev);
4627                 /* force the active interrupt state to all disabled */
4628                 si_disable_interrupt_state(rdev);
4629                 return 0;
4630         }
4631
4632         if (!ASIC_IS_NODCE(rdev)) {
4633                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4634                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4635                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4636                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4637                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4638                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4639         }
4640
4641         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4642         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4643
4644         /* enable CP interrupts on all rings */
4645         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4646                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4647                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4648         }
4649         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4650                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4651                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4652         }
4653         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4654                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4655                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4656         }
4657         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4658                 DRM_DEBUG("si_irq_set: sw int dma\n");
4659                 dma_cntl |= TRAP_ENABLE;
4660         }
4661
4662         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4663                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4664                 dma_cntl1 |= TRAP_ENABLE;
4665         }
4666         if (rdev->irq.crtc_vblank_int[0] ||
4667             atomic_read(&rdev->irq.pflip[0])) {
4668                 DRM_DEBUG("si_irq_set: vblank 0\n");
4669                 crtc1 |= VBLANK_INT_MASK;
4670         }
4671         if (rdev->irq.crtc_vblank_int[1] ||
4672             atomic_read(&rdev->irq.pflip[1])) {
4673                 DRM_DEBUG("si_irq_set: vblank 1\n");
4674                 crtc2 |= VBLANK_INT_MASK;
4675         }
4676         if (rdev->irq.crtc_vblank_int[2] ||
4677             atomic_read(&rdev->irq.pflip[2])) {
4678                 DRM_DEBUG("si_irq_set: vblank 2\n");
4679                 crtc3 |= VBLANK_INT_MASK;
4680         }
4681         if (rdev->irq.crtc_vblank_int[3] ||
4682             atomic_read(&rdev->irq.pflip[3])) {
4683                 DRM_DEBUG("si_irq_set: vblank 3\n");
4684                 crtc4 |= VBLANK_INT_MASK;
4685         }
4686         if (rdev->irq.crtc_vblank_int[4] ||
4687             atomic_read(&rdev->irq.pflip[4])) {
4688                 DRM_DEBUG("si_irq_set: vblank 4\n");
4689                 crtc5 |= VBLANK_INT_MASK;
4690         }
4691         if (rdev->irq.crtc_vblank_int[5] ||
4692             atomic_read(&rdev->irq.pflip[5])) {
4693                 DRM_DEBUG("si_irq_set: vblank 5\n");
4694                 crtc6 |= VBLANK_INT_MASK;
4695         }
4696         if (rdev->irq.hpd[0]) {
4697                 DRM_DEBUG("si_irq_set: hpd 1\n");
4698                 hpd1 |= DC_HPDx_INT_EN;
4699         }
4700         if (rdev->irq.hpd[1]) {
4701                 DRM_DEBUG("si_irq_set: hpd 2\n");
4702                 hpd2 |= DC_HPDx_INT_EN;
4703         }
4704         if (rdev->irq.hpd[2]) {
4705                 DRM_DEBUG("si_irq_set: hpd 3\n");
4706                 hpd3 |= DC_HPDx_INT_EN;
4707         }
4708         if (rdev->irq.hpd[3]) {
4709                 DRM_DEBUG("si_irq_set: hpd 4\n");
4710                 hpd4 |= DC_HPDx_INT_EN;
4711         }
4712         if (rdev->irq.hpd[4]) {
4713                 DRM_DEBUG("si_irq_set: hpd 5\n");
4714                 hpd5 |= DC_HPDx_INT_EN;
4715         }
4716         if (rdev->irq.hpd[5]) {
4717                 DRM_DEBUG("si_irq_set: hpd 6\n");
4718                 hpd6 |= DC_HPDx_INT_EN;
4719         }
4720
4721         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4722         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4723         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4724
4725         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4726         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4727
4728         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4729
4730         if (rdev->num_crtc >= 2) {
4731                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4732                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4733         }
4734         if (rdev->num_crtc >= 4) {
4735                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4736                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4737         }
4738         if (rdev->num_crtc >= 6) {
4739                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4740                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4741         }
4742
4743         if (rdev->num_crtc >= 2) {
4744                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4745                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4746         }
4747         if (rdev->num_crtc >= 4) {
4748                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4749                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4750         }
4751         if (rdev->num_crtc >= 6) {
4752                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4753                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4754         }
4755
4756         if (!ASIC_IS_NODCE(rdev)) {
4757                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4758                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4759                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4760                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4761                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4762                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4763         }
4764
4765         return 0;
4766 }
4767
4768 static inline void si_irq_ack(struct radeon_device *rdev)
4769 {
4770         u32 tmp;
4771
4772         if (ASIC_IS_NODCE(rdev))
4773                 return;
4774
4775         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4776         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4777         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4778         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4779         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4780         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4781         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4782         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4783         if (rdev->num_crtc >= 4) {
4784                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4785                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4786         }
4787         if (rdev->num_crtc >= 6) {
4788                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4789                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4790         }
4791
4792         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4793                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4794         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4795                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4796         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4797                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4798         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4799                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4800         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4801                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4802         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4803                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4804
4805         if (rdev->num_crtc >= 4) {
4806                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4807                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4808                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4809                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4810                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4811                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4812                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4813                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4814                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4815                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4816                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4817                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4818         }
4819
4820         if (rdev->num_crtc >= 6) {
4821                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4822                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4823                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4824                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4825                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4826                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4827                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4828                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4829                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4830                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4831                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4832                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4833         }
4834
4835         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4836                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4837                 tmp |= DC_HPDx_INT_ACK;
4838                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4839         }
4840         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4841                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4842                 tmp |= DC_HPDx_INT_ACK;
4843                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4844         }
4845         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4846                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4847                 tmp |= DC_HPDx_INT_ACK;
4848                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4849         }
4850         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4851                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4852                 tmp |= DC_HPDx_INT_ACK;
4853                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4854         }
4855         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4856                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4857                 tmp |= DC_HPDx_INT_ACK;
4858                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4859         }
4860         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4861                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4862                 tmp |= DC_HPDx_INT_ACK;
4863                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4864         }
4865 }
4866
4867 static void si_irq_disable(struct radeon_device *rdev)
4868 {
4869         si_disable_interrupts(rdev);
4870         /* Wait and acknowledge irq */
4871         mdelay(1);
4872         si_irq_ack(rdev);
4873         si_disable_interrupt_state(rdev);
4874 }
4875
4876 static void si_irq_suspend(struct radeon_device *rdev)
4877 {
4878         si_irq_disable(rdev);
4879         si_rlc_stop(rdev);
4880 }
4881
4882 static void si_irq_fini(struct radeon_device *rdev)
4883 {
4884         si_irq_suspend(rdev);
4885         r600_ih_ring_fini(rdev);
4886 }
4887
4888 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4889 {
4890         u32 wptr, tmp;
4891
4892         if (rdev->wb.enabled)
4893                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4894         else
4895                 wptr = RREG32(IH_RB_WPTR);
4896
4897         if (wptr & RB_OVERFLOW) {
4898                 /* When a ring buffer overflow happen start parsing interrupt
4899                  * from the last not overwritten vector (wptr + 16). Hopefully
4900                  * this should allow us to catchup.
4901                  */
4902                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4903                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4904                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4905                 tmp = RREG32(IH_RB_CNTL);
4906                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4907                 WREG32(IH_RB_CNTL, tmp);
4908         }
4909         return (wptr & rdev->ih.ptr_mask);
4910 }
4911
4912 /*        SI IV Ring
4913  * Each IV ring entry is 128 bits:
4914  * [7:0]    - interrupt source id
4915  * [31:8]   - reserved
4916  * [59:32]  - interrupt source data
4917  * [63:60]  - reserved
4918  * [71:64]  - RINGID
4919  * [79:72]  - VMID
4920  * [127:80] - reserved
4921  */
4922 int si_irq_process(struct radeon_device *rdev)
4923 {
4924         u32 wptr;
4925         u32 rptr;
4926         u32 src_id, src_data, ring_id;
4927         u32 ring_index;
4928         bool queue_hotplug = false;
4929
4930         if (!rdev->ih.enabled || rdev->shutdown)
4931                 return IRQ_NONE;
4932
4933         wptr = si_get_ih_wptr(rdev);
4934
4935 restart_ih:
4936         /* is somebody else already processing irqs? */
4937         if (atomic_xchg(&rdev->ih.lock, 1))
4938                 return IRQ_NONE;
4939
4940         rptr = rdev->ih.rptr;
4941         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4942
4943         /* Order reading of wptr vs. reading of IH ring data */
4944         rmb();
4945
4946         /* display interrupts */
4947         si_irq_ack(rdev);
4948
4949         while (rptr != wptr) {
4950                 /* wptr/rptr are in bytes! */
4951                 ring_index = rptr / 4;
4952                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4953                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4954                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4955
4956                 switch (src_id) {
4957                 case 1: /* D1 vblank/vline */
4958                         switch (src_data) {
4959                         case 0: /* D1 vblank */
4960                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4961                                         if (rdev->irq.crtc_vblank_int[0]) {
4962                                                 drm_handle_vblank(rdev->ddev, 0);
4963                                                 rdev->pm.vblank_sync = true;
4964                                                 wake_up(&rdev->irq.vblank_queue);
4965                                         }
4966                                         if (atomic_read(&rdev->irq.pflip[0]))
4967                                                 radeon_crtc_handle_flip(rdev, 0);
4968                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4969                                         DRM_DEBUG("IH: D1 vblank\n");
4970                                 }
4971                                 break;
4972                         case 1: /* D1 vline */
4973                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4974                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4975                                         DRM_DEBUG("IH: D1 vline\n");
4976                                 }
4977                                 break;
4978                         default:
4979                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4980                                 break;
4981                         }
4982                         break;
4983                 case 2: /* D2 vblank/vline */
4984                         switch (src_data) {
4985                         case 0: /* D2 vblank */
4986                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4987                                         if (rdev->irq.crtc_vblank_int[1]) {
4988                                                 drm_handle_vblank(rdev->ddev, 1);
4989                                                 rdev->pm.vblank_sync = true;
4990                                                 wake_up(&rdev->irq.vblank_queue);
4991                                         }
4992                                         if (atomic_read(&rdev->irq.pflip[1]))
4993                                                 radeon_crtc_handle_flip(rdev, 1);
4994                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4995                                         DRM_DEBUG("IH: D2 vblank\n");
4996                                 }
4997                                 break;
4998                         case 1: /* D2 vline */
4999                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5000                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5001                                         DRM_DEBUG("IH: D2 vline\n");
5002                                 }
5003                                 break;
5004                         default:
5005                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5006                                 break;
5007                         }
5008                         break;
5009                 case 3: /* D3 vblank/vline */
5010                         switch (src_data) {
5011                         case 0: /* D3 vblank */
5012                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5013                                         if (rdev->irq.crtc_vblank_int[2]) {
5014                                                 drm_handle_vblank(rdev->ddev, 2);
5015                                                 rdev->pm.vblank_sync = true;
5016                                                 wake_up(&rdev->irq.vblank_queue);
5017                                         }
5018                                         if (atomic_read(&rdev->irq.pflip[2]))
5019                                                 radeon_crtc_handle_flip(rdev, 2);
5020                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5021                                         DRM_DEBUG("IH: D3 vblank\n");
5022                                 }
5023                                 break;
5024                         case 1: /* D3 vline */
5025                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5026                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5027                                         DRM_DEBUG("IH: D3 vline\n");
5028                                 }
5029                                 break;
5030                         default:
5031                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5032                                 break;
5033                         }
5034                         break;
5035                 case 4: /* D4 vblank/vline */
5036                         switch (src_data) {
5037                         case 0: /* D4 vblank */
5038                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5039                                         if (rdev->irq.crtc_vblank_int[3]) {
5040                                                 drm_handle_vblank(rdev->ddev, 3);
5041                                                 rdev->pm.vblank_sync = true;
5042                                                 wake_up(&rdev->irq.vblank_queue);
5043                                         }
5044                                         if (atomic_read(&rdev->irq.pflip[3]))
5045                                                 radeon_crtc_handle_flip(rdev, 3);
5046                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5047                                         DRM_DEBUG("IH: D4 vblank\n");
5048                                 }
5049                                 break;
5050                         case 1: /* D4 vline */
5051                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5052                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5053                                         DRM_DEBUG("IH: D4 vline\n");
5054                                 }
5055                                 break;
5056                         default:
5057                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5058                                 break;
5059                         }
5060                         break;
5061                 case 5: /* D5 vblank/vline */
5062                         switch (src_data) {
5063                         case 0: /* D5 vblank */
5064                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5065                                         if (rdev->irq.crtc_vblank_int[4]) {
5066                                                 drm_handle_vblank(rdev->ddev, 4);
5067                                                 rdev->pm.vblank_sync = true;
5068                                                 wake_up(&rdev->irq.vblank_queue);
5069                                         }
5070                                         if (atomic_read(&rdev->irq.pflip[4]))
5071                                                 radeon_crtc_handle_flip(rdev, 4);
5072                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5073                                         DRM_DEBUG("IH: D5 vblank\n");
5074                                 }
5075                                 break;
5076                         case 1: /* D5 vline */
5077                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5078                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5079                                         DRM_DEBUG("IH: D5 vline\n");
5080                                 }
5081                                 break;
5082                         default:
5083                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5084                                 break;
5085                         }
5086                         break;
5087                 case 6: /* D6 vblank/vline */
5088                         switch (src_data) {
5089                         case 0: /* D6 vblank */
5090                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5091                                         if (rdev->irq.crtc_vblank_int[5]) {
5092                                                 drm_handle_vblank(rdev->ddev, 5);
5093                                                 rdev->pm.vblank_sync = true;
5094                                                 wake_up(&rdev->irq.vblank_queue);
5095                                         }
5096                                         if (atomic_read(&rdev->irq.pflip[5]))
5097                                                 radeon_crtc_handle_flip(rdev, 5);
5098                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5099                                         DRM_DEBUG("IH: D6 vblank\n");
5100                                 }
5101                                 break;
5102                         case 1: /* D6 vline */
5103                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5104                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5105                                         DRM_DEBUG("IH: D6 vline\n");
5106                                 }
5107                                 break;
5108                         default:
5109                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5110                                 break;
5111                         }
5112                         break;
5113                 case 42: /* HPD hotplug */
5114                         switch (src_data) {
5115                         case 0:
5116                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5117                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5118                                         queue_hotplug = true;
5119                                         DRM_DEBUG("IH: HPD1\n");
5120                                 }
5121                                 break;
5122                         case 1:
5123                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5124                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5125                                         queue_hotplug = true;
5126                                         DRM_DEBUG("IH: HPD2\n");
5127                                 }
5128                                 break;
5129                         case 2:
5130                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5131                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5132                                         queue_hotplug = true;
5133                                         DRM_DEBUG("IH: HPD3\n");
5134                                 }
5135                                 break;
5136                         case 3:
5137                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5138                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5139                                         queue_hotplug = true;
5140                                         DRM_DEBUG("IH: HPD4\n");
5141                                 }
5142                                 break;
5143                         case 4:
5144                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5145                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5146                                         queue_hotplug = true;
5147                                         DRM_DEBUG("IH: HPD5\n");
5148                                 }
5149                                 break;
5150                         case 5:
5151                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5152                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5153                                         queue_hotplug = true;
5154                                         DRM_DEBUG("IH: HPD6\n");
5155                                 }
5156                                 break;
5157                         default:
5158                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5159                                 break;
5160                         }
5161                         break;
5162                 case 124: /* UVD */
5163                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
5164                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
5165                         break;
5166                 case 146:
5167                 case 147:
5168                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5169                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5170                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5171                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5172                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5173                         /* reset addr and status */
5174                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5175                         break;
5176                 case 176: /* RINGID0 CP_INT */
5177                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5178                         break;
5179                 case 177: /* RINGID1 CP_INT */
5180                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5181                         break;
5182                 case 178: /* RINGID2 CP_INT */
5183                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5184                         break;
5185                 case 181: /* CP EOP event */
5186                         DRM_DEBUG("IH: CP EOP\n");
5187                         switch (ring_id) {
5188                         case 0:
5189                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5190                                 break;
5191                         case 1:
5192                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5193                                 break;
5194                         case 2:
5195                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5196                                 break;
5197                         }
5198                         break;
5199                 case 224: /* DMA trap event */
5200                         DRM_DEBUG("IH: DMA trap\n");
5201                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5202                         break;
5203                 case 233: /* GUI IDLE */
5204                         DRM_DEBUG("IH: GUI idle\n");
5205                         break;
5206                 case 244: /* DMA trap event */
5207                         DRM_DEBUG("IH: DMA1 trap\n");
5208                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5209                         break;
5210                 default:
5211                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5212                         break;
5213                 }
5214
5215                 /* wptr/rptr are in bytes! */
5216                 rptr += 16;
5217                 rptr &= rdev->ih.ptr_mask;
5218         }
5219         if (queue_hotplug)
5220                 schedule_work(&rdev->hotplug_work);
5221         rdev->ih.rptr = rptr;
5222         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5223         atomic_set(&rdev->ih.lock, 0);
5224
5225         /* make sure wptr hasn't changed while processing */
5226         wptr = si_get_ih_wptr(rdev);
5227         if (wptr != rptr)
5228                 goto restart_ih;
5229
5230         return IRQ_HANDLED;
5231 }
5232
5233 /**
5234  * si_copy_dma - copy pages using the DMA engine
5235  *
5236  * @rdev: radeon_device pointer
5237  * @src_offset: src GPU address
5238  * @dst_offset: dst GPU address
5239  * @num_gpu_pages: number of GPU pages to xfer
5240  * @fence: radeon fence object
5241  *
5242  * Copy GPU paging using the DMA engine (SI).
5243  * Used by the radeon ttm implementation to move pages if
5244  * registered as the asic copy callback.
5245  */
5246 int si_copy_dma(struct radeon_device *rdev,
5247                 uint64_t src_offset, uint64_t dst_offset,
5248                 unsigned num_gpu_pages,
5249                 struct radeon_fence **fence)
5250 {
5251         struct radeon_semaphore *sem = NULL;
5252         int ring_index = rdev->asic->copy.dma_ring_index;
5253         struct radeon_ring *ring = &rdev->ring[ring_index];
5254         u32 size_in_bytes, cur_size_in_bytes;
5255         int i, num_loops;
5256         int r = 0;
5257
5258         r = radeon_semaphore_create(rdev, &sem);
5259         if (r) {
5260                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5261                 return r;
5262         }
5263
5264         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5265         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5266         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5267         if (r) {
5268                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5269                 radeon_semaphore_free(rdev, &sem, NULL);
5270                 return r;
5271         }
5272
5273         if (radeon_fence_need_sync(*fence, ring->idx)) {
5274                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5275                                             ring->idx);
5276                 radeon_fence_note_sync(*fence, ring->idx);
5277         } else {
5278                 radeon_semaphore_free(rdev, &sem, NULL);
5279         }
5280
5281         for (i = 0; i < num_loops; i++) {
5282                 cur_size_in_bytes = size_in_bytes;
5283                 if (cur_size_in_bytes > 0xFFFFF)
5284                         cur_size_in_bytes = 0xFFFFF;
5285                 size_in_bytes -= cur_size_in_bytes;
5286                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5287                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5288                 radeon_ring_write(ring, src_offset & 0xffffffff);
5289                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5290                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5291                 src_offset += cur_size_in_bytes;
5292                 dst_offset += cur_size_in_bytes;
5293         }
5294
5295         r = radeon_fence_emit(rdev, fence, ring->idx);
5296         if (r) {
5297                 radeon_ring_unlock_undo(rdev, ring);
5298                 return r;
5299         }
5300
5301         radeon_ring_unlock_commit(rdev, ring);
5302         radeon_semaphore_free(rdev, &sem, *fence);
5303
5304         return r;
5305 }
5306
5307 /*
5308  * startup/shutdown callbacks
5309  */
5310 static int si_startup(struct radeon_device *rdev)
5311 {
5312         struct radeon_ring *ring;
5313         int r;
5314
5315         si_mc_program(rdev);
5316
5317         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5318             !rdev->rlc_fw || !rdev->mc_fw) {
5319                 r = si_init_microcode(rdev);
5320                 if (r) {
5321                         DRM_ERROR("Failed to load firmware!\n");
5322                         return r;
5323                 }
5324         }
5325
5326         r = si_mc_load_microcode(rdev);
5327         if (r) {
5328                 DRM_ERROR("Failed to load MC firmware!\n");
5329                 return r;
5330         }
5331
5332         r = r600_vram_scratch_init(rdev);
5333         if (r)
5334                 return r;
5335
5336         r = si_pcie_gart_enable(rdev);
5337         if (r)
5338                 return r;
5339         si_gpu_init(rdev);
5340
5341         /* allocate rlc buffers */
5342         r = si_rlc_init(rdev);
5343         if (r) {
5344                 DRM_ERROR("Failed to init rlc BOs!\n");
5345                 return r;
5346         }
5347
5348         /* allocate wb buffer */
5349         r = radeon_wb_init(rdev);
5350         if (r)
5351                 return r;
5352
5353         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5354         if (r) {
5355                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5356                 return r;
5357         }
5358
5359         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5360         if (r) {
5361                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5362                 return r;
5363         }
5364
5365         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5366         if (r) {
5367                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5368                 return r;
5369         }
5370
5371         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5372         if (r) {
5373                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5374                 return r;
5375         }
5376
5377         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5378         if (r) {
5379                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5380                 return r;
5381         }
5382
5383         if (rdev->has_uvd) {
5384                 r = rv770_uvd_resume(rdev);
5385                 if (!r) {
5386                         r = radeon_fence_driver_start_ring(rdev,
5387                                                            R600_RING_TYPE_UVD_INDEX);
5388                         if (r)
5389                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5390                 }
5391                 if (r)
5392                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5393         }
5394
5395         /* Enable IRQ */
5396         if (!rdev->irq.installed) {
5397                 r = radeon_irq_kms_init(rdev);
5398                 if (r)
5399                         return r;
5400         }
5401
5402         r = si_irq_init(rdev);
5403         if (r) {
5404                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5405                 radeon_irq_kms_fini(rdev);
5406                 return r;
5407         }
5408         si_irq_set(rdev);
5409
5410         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5411         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5412                              CP_RB0_RPTR, CP_RB0_WPTR,
5413                              0, 0xfffff, RADEON_CP_PACKET2);
5414         if (r)
5415                 return r;
5416
5417         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5418         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5419                              CP_RB1_RPTR, CP_RB1_WPTR,
5420                              0, 0xfffff, RADEON_CP_PACKET2);
5421         if (r)
5422                 return r;
5423
5424         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5425         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5426                              CP_RB2_RPTR, CP_RB2_WPTR,
5427                              0, 0xfffff, RADEON_CP_PACKET2);
5428         if (r)
5429                 return r;
5430
5431         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5432         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5433                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5434                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5435                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5436         if (r)
5437                 return r;
5438
5439         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5440         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5441                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5442                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5443                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5444         if (r)
5445                 return r;
5446
5447         r = si_cp_load_microcode(rdev);
5448         if (r)
5449                 return r;
5450         r = si_cp_resume(rdev);
5451         if (r)
5452                 return r;
5453
5454         r = cayman_dma_resume(rdev);
5455         if (r)
5456                 return r;
5457
5458         if (rdev->has_uvd) {
5459                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5460                 if (ring->ring_size) {
5461                         r = radeon_ring_init(rdev, ring, ring->ring_size,
5462                                              R600_WB_UVD_RPTR_OFFSET,
5463                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5464                                              0, 0xfffff, RADEON_CP_PACKET2);
5465                         if (!r)
5466                                 r = r600_uvd_init(rdev);
5467                         if (r)
5468                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5469                 }
5470         }
5471
5472         r = radeon_ib_pool_init(rdev);
5473         if (r) {
5474                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5475                 return r;
5476         }
5477
5478         r = radeon_vm_manager_init(rdev);
5479         if (r) {
5480                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5481                 return r;
5482         }
5483
5484         return 0;
5485 }
5486
5487 int si_resume(struct radeon_device *rdev)
5488 {
5489         int r;
5490
5491         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5492          * posting will perform necessary task to bring back GPU into good
5493          * shape.
5494          */
5495         /* post card */
5496         atom_asic_init(rdev->mode_info.atom_context);
5497
5498         /* init golden registers */
5499         si_init_golden_registers(rdev);
5500
5501         rdev->accel_working = true;
5502         r = si_startup(rdev);
5503         if (r) {
5504                 DRM_ERROR("si startup failed on resume\n");
5505                 rdev->accel_working = false;
5506                 return r;
5507         }
5508
5509         return r;
5510
5511 }
5512
5513 int si_suspend(struct radeon_device *rdev)
5514 {
5515         radeon_vm_manager_fini(rdev);
5516         si_cp_enable(rdev, false);
5517         cayman_dma_stop(rdev);
5518         if (rdev->has_uvd) {
5519                 r600_uvd_stop(rdev);
5520                 radeon_uvd_suspend(rdev);
5521         }
5522         si_irq_suspend(rdev);
5523         radeon_wb_disable(rdev);
5524         si_pcie_gart_disable(rdev);
5525         return 0;
5526 }
5527
5528 /* Plan is to move initialization in that function and use
5529  * helper function so that radeon_device_init pretty much
5530  * do nothing more than calling asic specific function. This
5531  * should also allow to remove a bunch of callback function
5532  * like vram_info.
5533  */
5534 int si_init(struct radeon_device *rdev)
5535 {
5536         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5537         int r;
5538
5539         /* Read BIOS */
5540         if (!radeon_get_bios(rdev)) {
5541                 if (ASIC_IS_AVIVO(rdev))
5542                         return -EINVAL;
5543         }
5544         /* Must be an ATOMBIOS */
5545         if (!rdev->is_atom_bios) {
5546                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5547                 return -EINVAL;
5548         }
5549         r = radeon_atombios_init(rdev);
5550         if (r)
5551                 return r;
5552
5553         /* Post card if necessary */
5554         if (!radeon_card_posted(rdev)) {
5555                 if (!rdev->bios) {
5556                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5557                         return -EINVAL;
5558                 }
5559                 DRM_INFO("GPU not posted. posting now...\n");
5560                 atom_asic_init(rdev->mode_info.atom_context);
5561         }
5562         /* init golden registers */
5563         si_init_golden_registers(rdev);
5564         /* Initialize scratch registers */
5565         si_scratch_init(rdev);
5566         /* Initialize surface registers */
5567         radeon_surface_init(rdev);
5568         /* Initialize clocks */
5569         radeon_get_clock_info(rdev->ddev);
5570
5571         /* Fence driver */
5572         r = radeon_fence_driver_init(rdev);
5573         if (r)
5574                 return r;
5575
5576         /* initialize memory controller */
5577         r = si_mc_init(rdev);
5578         if (r)
5579                 return r;
5580         /* Memory manager */
5581         r = radeon_bo_init(rdev);
5582         if (r)
5583                 return r;
5584
5585         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5586         ring->ring_obj = NULL;
5587         r600_ring_init(rdev, ring, 1024 * 1024);
5588
5589         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5590         ring->ring_obj = NULL;
5591         r600_ring_init(rdev, ring, 1024 * 1024);
5592
5593         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5594         ring->ring_obj = NULL;
5595         r600_ring_init(rdev, ring, 1024 * 1024);
5596
5597         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5598         ring->ring_obj = NULL;
5599         r600_ring_init(rdev, ring, 64 * 1024);
5600
5601         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5602         ring->ring_obj = NULL;
5603         r600_ring_init(rdev, ring, 64 * 1024);
5604
5605         if (rdev->has_uvd) {
5606                 r = radeon_uvd_init(rdev);
5607                 if (!r) {
5608                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5609                         ring->ring_obj = NULL;
5610                         r600_ring_init(rdev, ring, 4096);
5611                 }
5612         }
5613
5614         rdev->ih.ring_obj = NULL;
5615         r600_ih_ring_init(rdev, 64 * 1024);
5616
5617         r = r600_pcie_gart_init(rdev);
5618         if (r)
5619                 return r;
5620
5621         rdev->accel_working = true;
5622         r = si_startup(rdev);
5623         if (r) {
5624                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5625                 si_cp_fini(rdev);
5626                 cayman_dma_fini(rdev);
5627                 si_irq_fini(rdev);
5628                 si_rlc_fini(rdev);
5629                 radeon_wb_fini(rdev);
5630                 radeon_ib_pool_fini(rdev);
5631                 radeon_vm_manager_fini(rdev);
5632                 radeon_irq_kms_fini(rdev);
5633                 si_pcie_gart_fini(rdev);
5634                 rdev->accel_working = false;
5635         }
5636
5637         /* Don't start up if the MC ucode is missing.
5638          * The default clocks and voltages before the MC ucode
5639          * is loaded are not suffient for advanced operations.
5640          */
5641         if (!rdev->mc_fw) {
5642                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5643                 return -EINVAL;
5644         }
5645
5646         return 0;
5647 }
5648
5649 void si_fini(struct radeon_device *rdev)
5650 {
5651         si_cp_fini(rdev);
5652         cayman_dma_fini(rdev);
5653         si_irq_fini(rdev);
5654         si_rlc_fini(rdev);
5655         radeon_wb_fini(rdev);
5656         radeon_vm_manager_fini(rdev);
5657         radeon_ib_pool_fini(rdev);
5658         radeon_irq_kms_fini(rdev);
5659         if (rdev->has_uvd) {
5660                 r600_uvd_stop(rdev);
5661                 radeon_uvd_fini(rdev);
5662         }
5663         si_pcie_gart_fini(rdev);
5664         r600_vram_scratch_fini(rdev);
5665         radeon_gem_fini(rdev);
5666         radeon_fence_driver_fini(rdev);
5667         radeon_bo_fini(rdev);
5668         radeon_atombios_fini(rdev);
5669         kfree(rdev->bios);
5670         rdev->bios = NULL;
5671 }
5672
5673 /**
5674  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5675  *
5676  * @rdev: radeon_device pointer
5677  *
5678  * Fetches a GPU clock counter snapshot (SI).
5679  * Returns the 64 bit clock counter snapshot.
5680  */
5681 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5682 {
5683         uint64_t clock;
5684
5685         mutex_lock(&rdev->gpu_clock_mutex);
5686         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5687         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5688                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5689         mutex_unlock(&rdev->gpu_clock_mutex);
5690         return clock;
5691 }
5692
5693 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5694 {
5695         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5696         int r;
5697
5698         /* bypass vclk and dclk with bclk */
5699         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5700                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5701                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5702
5703         /* put PLL in bypass mode */
5704         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5705
5706         if (!vclk || !dclk) {
5707                 /* keep the Bypass mode, put PLL to sleep */
5708                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5709                 return 0;
5710         }
5711
5712         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5713                                           16384, 0x03FFFFFF, 0, 128, 5,
5714                                           &fb_div, &vclk_div, &dclk_div);
5715         if (r)
5716                 return r;
5717
5718         /* set RESET_ANTI_MUX to 0 */
5719         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5720
5721         /* set VCO_MODE to 1 */
5722         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5723
5724         /* toggle UPLL_SLEEP to 1 then back to 0 */
5725         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5726         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5727
5728         /* deassert UPLL_RESET */
5729         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5730
5731         mdelay(1);
5732
5733         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5734         if (r)
5735                 return r;
5736
5737         /* assert UPLL_RESET again */
5738         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5739
5740         /* disable spread spectrum. */
5741         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5742
5743         /* set feedback divider */
5744         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5745
5746         /* set ref divider to 0 */
5747         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5748
5749         if (fb_div < 307200)
5750                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5751         else
5752                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5753
5754         /* set PDIV_A and PDIV_B */
5755         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5756                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5757                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5758
5759         /* give the PLL some time to settle */
5760         mdelay(15);
5761
5762         /* deassert PLL_RESET */
5763         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5764
5765         mdelay(15);
5766
5767         /* switch from bypass mode to normal mode */
5768         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5769
5770         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5771         if (r)
5772                 return r;
5773
5774         /* switch VCLK and DCLK selection */
5775         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5776                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5777                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5778
5779         mdelay(100);
5780
5781         return 0;
5782 }