Merge remote-tracking branch 'lsk/v3.10/topic/arm64-fvp' into linux-linaro-lsk
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
68
69 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
70 extern void r600_ih_ring_fini(struct radeon_device *rdev);
71 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
75 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
76 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
77
78 static const u32 tahiti_golden_rlc_registers[] =
79 {
80         0xc424, 0xffffffff, 0x00601005,
81         0xc47c, 0xffffffff, 0x10104040,
82         0xc488, 0xffffffff, 0x0100000a,
83         0xc314, 0xffffffff, 0x00000800,
84         0xc30c, 0xffffffff, 0x800000f4,
85         0xf4a8, 0xffffffff, 0x00000000
86 };
87
88 static const u32 tahiti_golden_registers[] =
89 {
90         0x9a10, 0x00010000, 0x00018208,
91         0x9830, 0xffffffff, 0x00000000,
92         0x9834, 0xf00fffff, 0x00000400,
93         0x9838, 0x0002021c, 0x00020200,
94         0xc78, 0x00000080, 0x00000000,
95         0xd030, 0x000300c0, 0x00800040,
96         0xd830, 0x000300c0, 0x00800040,
97         0x5bb0, 0x000000f0, 0x00000070,
98         0x5bc0, 0x00200000, 0x50100000,
99         0x7030, 0x31000311, 0x00000011,
100         0x277c, 0x00000003, 0x000007ff,
101         0x240c, 0x000007ff, 0x00000000,
102         0x8a14, 0xf000001f, 0x00000007,
103         0x8b24, 0xffffffff, 0x00ffffff,
104         0x8b10, 0x0000ff0f, 0x00000000,
105         0x28a4c, 0x07ffffff, 0x4e000000,
106         0x28350, 0x3f3f3fff, 0x2a00126a,
107         0x30, 0x000000ff, 0x0040,
108         0x34, 0x00000040, 0x00004040,
109         0x9100, 0x07ffffff, 0x03000000,
110         0x8e88, 0x01ff1f3f, 0x00000000,
111         0x8e84, 0x01ff1f3f, 0x00000000,
112         0x9060, 0x0000007f, 0x00000020,
113         0x9508, 0x00010000, 0x00010000,
114         0xac14, 0x00000200, 0x000002fb,
115         0xac10, 0xffffffff, 0x0000543b,
116         0xac0c, 0xffffffff, 0xa9210876,
117         0x88d0, 0xffffffff, 0x000fff40,
118         0x88d4, 0x0000001f, 0x00000010,
119         0x1410, 0x20000000, 0x20fffed8,
120         0x15c0, 0x000c0fc0, 0x000c0400
121 };
122
123 static const u32 tahiti_golden_registers2[] =
124 {
125         0xc64, 0x00000001, 0x00000001
126 };
127
128 static const u32 pitcairn_golden_rlc_registers[] =
129 {
130         0xc424, 0xffffffff, 0x00601004,
131         0xc47c, 0xffffffff, 0x10102020,
132         0xc488, 0xffffffff, 0x01000020,
133         0xc314, 0xffffffff, 0x00000800,
134         0xc30c, 0xffffffff, 0x800000a4
135 };
136
137 static const u32 pitcairn_golden_registers[] =
138 {
139         0x9a10, 0x00010000, 0x00018208,
140         0x9830, 0xffffffff, 0x00000000,
141         0x9834, 0xf00fffff, 0x00000400,
142         0x9838, 0x0002021c, 0x00020200,
143         0xc78, 0x00000080, 0x00000000,
144         0xd030, 0x000300c0, 0x00800040,
145         0xd830, 0x000300c0, 0x00800040,
146         0x5bb0, 0x000000f0, 0x00000070,
147         0x5bc0, 0x00200000, 0x50100000,
148         0x7030, 0x31000311, 0x00000011,
149         0x2ae4, 0x00073ffe, 0x000022a2,
150         0x240c, 0x000007ff, 0x00000000,
151         0x8a14, 0xf000001f, 0x00000007,
152         0x8b24, 0xffffffff, 0x00ffffff,
153         0x8b10, 0x0000ff0f, 0x00000000,
154         0x28a4c, 0x07ffffff, 0x4e000000,
155         0x28350, 0x3f3f3fff, 0x2a00126a,
156         0x30, 0x000000ff, 0x0040,
157         0x34, 0x00000040, 0x00004040,
158         0x9100, 0x07ffffff, 0x03000000,
159         0x9060, 0x0000007f, 0x00000020,
160         0x9508, 0x00010000, 0x00010000,
161         0xac14, 0x000003ff, 0x000000f7,
162         0xac10, 0xffffffff, 0x00000000,
163         0xac0c, 0xffffffff, 0x32761054,
164         0x88d4, 0x0000001f, 0x00000010,
165         0x15c0, 0x000c0fc0, 0x000c0400
166 };
167
168 static const u32 verde_golden_rlc_registers[] =
169 {
170         0xc424, 0xffffffff, 0x033f1005,
171         0xc47c, 0xffffffff, 0x10808020,
172         0xc488, 0xffffffff, 0x00800008,
173         0xc314, 0xffffffff, 0x00001000,
174         0xc30c, 0xffffffff, 0x80010014
175 };
176
177 static const u32 verde_golden_registers[] =
178 {
179         0x9a10, 0x00010000, 0x00018208,
180         0x9830, 0xffffffff, 0x00000000,
181         0x9834, 0xf00fffff, 0x00000400,
182         0x9838, 0x0002021c, 0x00020200,
183         0xc78, 0x00000080, 0x00000000,
184         0xd030, 0x000300c0, 0x00800040,
185         0xd030, 0x000300c0, 0x00800040,
186         0xd830, 0x000300c0, 0x00800040,
187         0xd830, 0x000300c0, 0x00800040,
188         0x5bb0, 0x000000f0, 0x00000070,
189         0x5bc0, 0x00200000, 0x50100000,
190         0x7030, 0x31000311, 0x00000011,
191         0x2ae4, 0x00073ffe, 0x000022a2,
192         0x2ae4, 0x00073ffe, 0x000022a2,
193         0x2ae4, 0x00073ffe, 0x000022a2,
194         0x240c, 0x000007ff, 0x00000000,
195         0x240c, 0x000007ff, 0x00000000,
196         0x240c, 0x000007ff, 0x00000000,
197         0x8a14, 0xf000001f, 0x00000007,
198         0x8a14, 0xf000001f, 0x00000007,
199         0x8a14, 0xf000001f, 0x00000007,
200         0x8b24, 0xffffffff, 0x00ffffff,
201         0x8b10, 0x0000ff0f, 0x00000000,
202         0x28a4c, 0x07ffffff, 0x4e000000,
203         0x28350, 0x3f3f3fff, 0x0000124a,
204         0x28350, 0x3f3f3fff, 0x0000124a,
205         0x28350, 0x3f3f3fff, 0x0000124a,
206         0x30, 0x000000ff, 0x0040,
207         0x34, 0x00000040, 0x00004040,
208         0x9100, 0x07ffffff, 0x03000000,
209         0x9100, 0x07ffffff, 0x03000000,
210         0x8e88, 0x01ff1f3f, 0x00000000,
211         0x8e88, 0x01ff1f3f, 0x00000000,
212         0x8e88, 0x01ff1f3f, 0x00000000,
213         0x8e84, 0x01ff1f3f, 0x00000000,
214         0x8e84, 0x01ff1f3f, 0x00000000,
215         0x8e84, 0x01ff1f3f, 0x00000000,
216         0x9060, 0x0000007f, 0x00000020,
217         0x9508, 0x00010000, 0x00010000,
218         0xac14, 0x000003ff, 0x00000003,
219         0xac14, 0x000003ff, 0x00000003,
220         0xac14, 0x000003ff, 0x00000003,
221         0xac10, 0xffffffff, 0x00000000,
222         0xac10, 0xffffffff, 0x00000000,
223         0xac10, 0xffffffff, 0x00000000,
224         0xac0c, 0xffffffff, 0x00001032,
225         0xac0c, 0xffffffff, 0x00001032,
226         0xac0c, 0xffffffff, 0x00001032,
227         0x88d4, 0x0000001f, 0x00000010,
228         0x88d4, 0x0000001f, 0x00000010,
229         0x88d4, 0x0000001f, 0x00000010,
230         0x15c0, 0x000c0fc0, 0x000c0400
231 };
232
233 static const u32 oland_golden_rlc_registers[] =
234 {
235         0xc424, 0xffffffff, 0x00601005,
236         0xc47c, 0xffffffff, 0x10104040,
237         0xc488, 0xffffffff, 0x0100000a,
238         0xc314, 0xffffffff, 0x00000800,
239         0xc30c, 0xffffffff, 0x800000f4
240 };
241
242 static const u32 oland_golden_registers[] =
243 {
244         0x9a10, 0x00010000, 0x00018208,
245         0x9830, 0xffffffff, 0x00000000,
246         0x9834, 0xf00fffff, 0x00000400,
247         0x9838, 0x0002021c, 0x00020200,
248         0xc78, 0x00000080, 0x00000000,
249         0xd030, 0x000300c0, 0x00800040,
250         0xd830, 0x000300c0, 0x00800040,
251         0x5bb0, 0x000000f0, 0x00000070,
252         0x5bc0, 0x00200000, 0x50100000,
253         0x7030, 0x31000311, 0x00000011,
254         0x2ae4, 0x00073ffe, 0x000022a2,
255         0x240c, 0x000007ff, 0x00000000,
256         0x8a14, 0xf000001f, 0x00000007,
257         0x8b24, 0xffffffff, 0x00ffffff,
258         0x8b10, 0x0000ff0f, 0x00000000,
259         0x28a4c, 0x07ffffff, 0x4e000000,
260         0x28350, 0x3f3f3fff, 0x00000082,
261         0x30, 0x000000ff, 0x0040,
262         0x34, 0x00000040, 0x00004040,
263         0x9100, 0x07ffffff, 0x03000000,
264         0x9060, 0x0000007f, 0x00000020,
265         0x9508, 0x00010000, 0x00010000,
266         0xac14, 0x000003ff, 0x000000f3,
267         0xac10, 0xffffffff, 0x00000000,
268         0xac0c, 0xffffffff, 0x00003210,
269         0x88d4, 0x0000001f, 0x00000010,
270         0x15c0, 0x000c0fc0, 0x000c0400
271 };
272
273 static const u32 hainan_golden_registers[] =
274 {
275         0x9a10, 0x00010000, 0x00018208,
276         0x9830, 0xffffffff, 0x00000000,
277         0x9834, 0xf00fffff, 0x00000400,
278         0x9838, 0x0002021c, 0x00020200,
279         0xd0c0, 0xff000fff, 0x00000100,
280         0xd030, 0x000300c0, 0x00800040,
281         0xd8c0, 0xff000fff, 0x00000100,
282         0xd830, 0x000300c0, 0x00800040,
283         0x2ae4, 0x00073ffe, 0x000022a2,
284         0x240c, 0x000007ff, 0x00000000,
285         0x8a14, 0xf000001f, 0x00000007,
286         0x8b24, 0xffffffff, 0x00ffffff,
287         0x8b10, 0x0000ff0f, 0x00000000,
288         0x28a4c, 0x07ffffff, 0x4e000000,
289         0x28350, 0x3f3f3fff, 0x00000000,
290         0x30, 0x000000ff, 0x0040,
291         0x34, 0x00000040, 0x00004040,
292         0x9100, 0x03e00000, 0x03600000,
293         0x9060, 0x0000007f, 0x00000020,
294         0x9508, 0x00010000, 0x00010000,
295         0xac14, 0x000003ff, 0x000000f1,
296         0xac10, 0xffffffff, 0x00000000,
297         0xac0c, 0xffffffff, 0x00003210,
298         0x88d4, 0x0000001f, 0x00000010,
299         0x15c0, 0x000c0fc0, 0x000c0400
300 };
301
302 static const u32 hainan_golden_registers2[] =
303 {
304         0x98f8, 0xffffffff, 0x02010001
305 };
306
307 static const u32 tahiti_mgcg_cgcg_init[] =
308 {
309         0xc400, 0xffffffff, 0xfffffffc,
310         0x802c, 0xffffffff, 0xe0000000,
311         0x9a60, 0xffffffff, 0x00000100,
312         0x92a4, 0xffffffff, 0x00000100,
313         0xc164, 0xffffffff, 0x00000100,
314         0x9774, 0xffffffff, 0x00000100,
315         0x8984, 0xffffffff, 0x06000100,
316         0x8a18, 0xffffffff, 0x00000100,
317         0x92a0, 0xffffffff, 0x00000100,
318         0xc380, 0xffffffff, 0x00000100,
319         0x8b28, 0xffffffff, 0x00000100,
320         0x9144, 0xffffffff, 0x00000100,
321         0x8d88, 0xffffffff, 0x00000100,
322         0x8d8c, 0xffffffff, 0x00000100,
323         0x9030, 0xffffffff, 0x00000100,
324         0x9034, 0xffffffff, 0x00000100,
325         0x9038, 0xffffffff, 0x00000100,
326         0x903c, 0xffffffff, 0x00000100,
327         0xad80, 0xffffffff, 0x00000100,
328         0xac54, 0xffffffff, 0x00000100,
329         0x897c, 0xffffffff, 0x06000100,
330         0x9868, 0xffffffff, 0x00000100,
331         0x9510, 0xffffffff, 0x00000100,
332         0xaf04, 0xffffffff, 0x00000100,
333         0xae04, 0xffffffff, 0x00000100,
334         0x949c, 0xffffffff, 0x00000100,
335         0x802c, 0xffffffff, 0xe0000000,
336         0x9160, 0xffffffff, 0x00010000,
337         0x9164, 0xffffffff, 0x00030002,
338         0x9168, 0xffffffff, 0x00040007,
339         0x916c, 0xffffffff, 0x00060005,
340         0x9170, 0xffffffff, 0x00090008,
341         0x9174, 0xffffffff, 0x00020001,
342         0x9178, 0xffffffff, 0x00040003,
343         0x917c, 0xffffffff, 0x00000007,
344         0x9180, 0xffffffff, 0x00060005,
345         0x9184, 0xffffffff, 0x00090008,
346         0x9188, 0xffffffff, 0x00030002,
347         0x918c, 0xffffffff, 0x00050004,
348         0x9190, 0xffffffff, 0x00000008,
349         0x9194, 0xffffffff, 0x00070006,
350         0x9198, 0xffffffff, 0x000a0009,
351         0x919c, 0xffffffff, 0x00040003,
352         0x91a0, 0xffffffff, 0x00060005,
353         0x91a4, 0xffffffff, 0x00000009,
354         0x91a8, 0xffffffff, 0x00080007,
355         0x91ac, 0xffffffff, 0x000b000a,
356         0x91b0, 0xffffffff, 0x00050004,
357         0x91b4, 0xffffffff, 0x00070006,
358         0x91b8, 0xffffffff, 0x0008000b,
359         0x91bc, 0xffffffff, 0x000a0009,
360         0x91c0, 0xffffffff, 0x000d000c,
361         0x91c4, 0xffffffff, 0x00060005,
362         0x91c8, 0xffffffff, 0x00080007,
363         0x91cc, 0xffffffff, 0x0000000b,
364         0x91d0, 0xffffffff, 0x000a0009,
365         0x91d4, 0xffffffff, 0x000d000c,
366         0x91d8, 0xffffffff, 0x00070006,
367         0x91dc, 0xffffffff, 0x00090008,
368         0x91e0, 0xffffffff, 0x0000000c,
369         0x91e4, 0xffffffff, 0x000b000a,
370         0x91e8, 0xffffffff, 0x000e000d,
371         0x91ec, 0xffffffff, 0x00080007,
372         0x91f0, 0xffffffff, 0x000a0009,
373         0x91f4, 0xffffffff, 0x0000000d,
374         0x91f8, 0xffffffff, 0x000c000b,
375         0x91fc, 0xffffffff, 0x000f000e,
376         0x9200, 0xffffffff, 0x00090008,
377         0x9204, 0xffffffff, 0x000b000a,
378         0x9208, 0xffffffff, 0x000c000f,
379         0x920c, 0xffffffff, 0x000e000d,
380         0x9210, 0xffffffff, 0x00110010,
381         0x9214, 0xffffffff, 0x000a0009,
382         0x9218, 0xffffffff, 0x000c000b,
383         0x921c, 0xffffffff, 0x0000000f,
384         0x9220, 0xffffffff, 0x000e000d,
385         0x9224, 0xffffffff, 0x00110010,
386         0x9228, 0xffffffff, 0x000b000a,
387         0x922c, 0xffffffff, 0x000d000c,
388         0x9230, 0xffffffff, 0x00000010,
389         0x9234, 0xffffffff, 0x000f000e,
390         0x9238, 0xffffffff, 0x00120011,
391         0x923c, 0xffffffff, 0x000c000b,
392         0x9240, 0xffffffff, 0x000e000d,
393         0x9244, 0xffffffff, 0x00000011,
394         0x9248, 0xffffffff, 0x0010000f,
395         0x924c, 0xffffffff, 0x00130012,
396         0x9250, 0xffffffff, 0x000d000c,
397         0x9254, 0xffffffff, 0x000f000e,
398         0x9258, 0xffffffff, 0x00100013,
399         0x925c, 0xffffffff, 0x00120011,
400         0x9260, 0xffffffff, 0x00150014,
401         0x9264, 0xffffffff, 0x000e000d,
402         0x9268, 0xffffffff, 0x0010000f,
403         0x926c, 0xffffffff, 0x00000013,
404         0x9270, 0xffffffff, 0x00120011,
405         0x9274, 0xffffffff, 0x00150014,
406         0x9278, 0xffffffff, 0x000f000e,
407         0x927c, 0xffffffff, 0x00110010,
408         0x9280, 0xffffffff, 0x00000014,
409         0x9284, 0xffffffff, 0x00130012,
410         0x9288, 0xffffffff, 0x00160015,
411         0x928c, 0xffffffff, 0x0010000f,
412         0x9290, 0xffffffff, 0x00120011,
413         0x9294, 0xffffffff, 0x00000015,
414         0x9298, 0xffffffff, 0x00140013,
415         0x929c, 0xffffffff, 0x00170016,
416         0x9150, 0xffffffff, 0x96940200,
417         0x8708, 0xffffffff, 0x00900100,
418         0xc478, 0xffffffff, 0x00000080,
419         0xc404, 0xffffffff, 0x0020003f,
420         0x30, 0xffffffff, 0x0000001c,
421         0x34, 0x000f0000, 0x000f0000,
422         0x160c, 0xffffffff, 0x00000100,
423         0x1024, 0xffffffff, 0x00000100,
424         0x102c, 0x00000101, 0x00000000,
425         0x20a8, 0xffffffff, 0x00000104,
426         0x264c, 0x000c0000, 0x000c0000,
427         0x2648, 0x000c0000, 0x000c0000,
428         0x55e4, 0xff000fff, 0x00000100,
429         0x55e8, 0x00000001, 0x00000001,
430         0x2f50, 0x00000001, 0x00000001,
431         0x30cc, 0xc0000fff, 0x00000104,
432         0xc1e4, 0x00000001, 0x00000001,
433         0xd0c0, 0xfffffff0, 0x00000100,
434         0xd8c0, 0xfffffff0, 0x00000100
435 };
436
437 static const u32 pitcairn_mgcg_cgcg_init[] =
438 {
439         0xc400, 0xffffffff, 0xfffffffc,
440         0x802c, 0xffffffff, 0xe0000000,
441         0x9a60, 0xffffffff, 0x00000100,
442         0x92a4, 0xffffffff, 0x00000100,
443         0xc164, 0xffffffff, 0x00000100,
444         0x9774, 0xffffffff, 0x00000100,
445         0x8984, 0xffffffff, 0x06000100,
446         0x8a18, 0xffffffff, 0x00000100,
447         0x92a0, 0xffffffff, 0x00000100,
448         0xc380, 0xffffffff, 0x00000100,
449         0x8b28, 0xffffffff, 0x00000100,
450         0x9144, 0xffffffff, 0x00000100,
451         0x8d88, 0xffffffff, 0x00000100,
452         0x8d8c, 0xffffffff, 0x00000100,
453         0x9030, 0xffffffff, 0x00000100,
454         0x9034, 0xffffffff, 0x00000100,
455         0x9038, 0xffffffff, 0x00000100,
456         0x903c, 0xffffffff, 0x00000100,
457         0xad80, 0xffffffff, 0x00000100,
458         0xac54, 0xffffffff, 0x00000100,
459         0x897c, 0xffffffff, 0x06000100,
460         0x9868, 0xffffffff, 0x00000100,
461         0x9510, 0xffffffff, 0x00000100,
462         0xaf04, 0xffffffff, 0x00000100,
463         0xae04, 0xffffffff, 0x00000100,
464         0x949c, 0xffffffff, 0x00000100,
465         0x802c, 0xffffffff, 0xe0000000,
466         0x9160, 0xffffffff, 0x00010000,
467         0x9164, 0xffffffff, 0x00030002,
468         0x9168, 0xffffffff, 0x00040007,
469         0x916c, 0xffffffff, 0x00060005,
470         0x9170, 0xffffffff, 0x00090008,
471         0x9174, 0xffffffff, 0x00020001,
472         0x9178, 0xffffffff, 0x00040003,
473         0x917c, 0xffffffff, 0x00000007,
474         0x9180, 0xffffffff, 0x00060005,
475         0x9184, 0xffffffff, 0x00090008,
476         0x9188, 0xffffffff, 0x00030002,
477         0x918c, 0xffffffff, 0x00050004,
478         0x9190, 0xffffffff, 0x00000008,
479         0x9194, 0xffffffff, 0x00070006,
480         0x9198, 0xffffffff, 0x000a0009,
481         0x919c, 0xffffffff, 0x00040003,
482         0x91a0, 0xffffffff, 0x00060005,
483         0x91a4, 0xffffffff, 0x00000009,
484         0x91a8, 0xffffffff, 0x00080007,
485         0x91ac, 0xffffffff, 0x000b000a,
486         0x91b0, 0xffffffff, 0x00050004,
487         0x91b4, 0xffffffff, 0x00070006,
488         0x91b8, 0xffffffff, 0x0008000b,
489         0x91bc, 0xffffffff, 0x000a0009,
490         0x91c0, 0xffffffff, 0x000d000c,
491         0x9200, 0xffffffff, 0x00090008,
492         0x9204, 0xffffffff, 0x000b000a,
493         0x9208, 0xffffffff, 0x000c000f,
494         0x920c, 0xffffffff, 0x000e000d,
495         0x9210, 0xffffffff, 0x00110010,
496         0x9214, 0xffffffff, 0x000a0009,
497         0x9218, 0xffffffff, 0x000c000b,
498         0x921c, 0xffffffff, 0x0000000f,
499         0x9220, 0xffffffff, 0x000e000d,
500         0x9224, 0xffffffff, 0x00110010,
501         0x9228, 0xffffffff, 0x000b000a,
502         0x922c, 0xffffffff, 0x000d000c,
503         0x9230, 0xffffffff, 0x00000010,
504         0x9234, 0xffffffff, 0x000f000e,
505         0x9238, 0xffffffff, 0x00120011,
506         0x923c, 0xffffffff, 0x000c000b,
507         0x9240, 0xffffffff, 0x000e000d,
508         0x9244, 0xffffffff, 0x00000011,
509         0x9248, 0xffffffff, 0x0010000f,
510         0x924c, 0xffffffff, 0x00130012,
511         0x9250, 0xffffffff, 0x000d000c,
512         0x9254, 0xffffffff, 0x000f000e,
513         0x9258, 0xffffffff, 0x00100013,
514         0x925c, 0xffffffff, 0x00120011,
515         0x9260, 0xffffffff, 0x00150014,
516         0x9150, 0xffffffff, 0x96940200,
517         0x8708, 0xffffffff, 0x00900100,
518         0xc478, 0xffffffff, 0x00000080,
519         0xc404, 0xffffffff, 0x0020003f,
520         0x30, 0xffffffff, 0x0000001c,
521         0x34, 0x000f0000, 0x000f0000,
522         0x160c, 0xffffffff, 0x00000100,
523         0x1024, 0xffffffff, 0x00000100,
524         0x102c, 0x00000101, 0x00000000,
525         0x20a8, 0xffffffff, 0x00000104,
526         0x55e4, 0xff000fff, 0x00000100,
527         0x55e8, 0x00000001, 0x00000001,
528         0x2f50, 0x00000001, 0x00000001,
529         0x30cc, 0xc0000fff, 0x00000104,
530         0xc1e4, 0x00000001, 0x00000001,
531         0xd0c0, 0xfffffff0, 0x00000100,
532         0xd8c0, 0xfffffff0, 0x00000100
533 };
534
535 static const u32 verde_mgcg_cgcg_init[] =
536 {
537         0xc400, 0xffffffff, 0xfffffffc,
538         0x802c, 0xffffffff, 0xe0000000,
539         0x9a60, 0xffffffff, 0x00000100,
540         0x92a4, 0xffffffff, 0x00000100,
541         0xc164, 0xffffffff, 0x00000100,
542         0x9774, 0xffffffff, 0x00000100,
543         0x8984, 0xffffffff, 0x06000100,
544         0x8a18, 0xffffffff, 0x00000100,
545         0x92a0, 0xffffffff, 0x00000100,
546         0xc380, 0xffffffff, 0x00000100,
547         0x8b28, 0xffffffff, 0x00000100,
548         0x9144, 0xffffffff, 0x00000100,
549         0x8d88, 0xffffffff, 0x00000100,
550         0x8d8c, 0xffffffff, 0x00000100,
551         0x9030, 0xffffffff, 0x00000100,
552         0x9034, 0xffffffff, 0x00000100,
553         0x9038, 0xffffffff, 0x00000100,
554         0x903c, 0xffffffff, 0x00000100,
555         0xad80, 0xffffffff, 0x00000100,
556         0xac54, 0xffffffff, 0x00000100,
557         0x897c, 0xffffffff, 0x06000100,
558         0x9868, 0xffffffff, 0x00000100,
559         0x9510, 0xffffffff, 0x00000100,
560         0xaf04, 0xffffffff, 0x00000100,
561         0xae04, 0xffffffff, 0x00000100,
562         0x949c, 0xffffffff, 0x00000100,
563         0x802c, 0xffffffff, 0xe0000000,
564         0x9160, 0xffffffff, 0x00010000,
565         0x9164, 0xffffffff, 0x00030002,
566         0x9168, 0xffffffff, 0x00040007,
567         0x916c, 0xffffffff, 0x00060005,
568         0x9170, 0xffffffff, 0x00090008,
569         0x9174, 0xffffffff, 0x00020001,
570         0x9178, 0xffffffff, 0x00040003,
571         0x917c, 0xffffffff, 0x00000007,
572         0x9180, 0xffffffff, 0x00060005,
573         0x9184, 0xffffffff, 0x00090008,
574         0x9188, 0xffffffff, 0x00030002,
575         0x918c, 0xffffffff, 0x00050004,
576         0x9190, 0xffffffff, 0x00000008,
577         0x9194, 0xffffffff, 0x00070006,
578         0x9198, 0xffffffff, 0x000a0009,
579         0x919c, 0xffffffff, 0x00040003,
580         0x91a0, 0xffffffff, 0x00060005,
581         0x91a4, 0xffffffff, 0x00000009,
582         0x91a8, 0xffffffff, 0x00080007,
583         0x91ac, 0xffffffff, 0x000b000a,
584         0x91b0, 0xffffffff, 0x00050004,
585         0x91b4, 0xffffffff, 0x00070006,
586         0x91b8, 0xffffffff, 0x0008000b,
587         0x91bc, 0xffffffff, 0x000a0009,
588         0x91c0, 0xffffffff, 0x000d000c,
589         0x9200, 0xffffffff, 0x00090008,
590         0x9204, 0xffffffff, 0x000b000a,
591         0x9208, 0xffffffff, 0x000c000f,
592         0x920c, 0xffffffff, 0x000e000d,
593         0x9210, 0xffffffff, 0x00110010,
594         0x9214, 0xffffffff, 0x000a0009,
595         0x9218, 0xffffffff, 0x000c000b,
596         0x921c, 0xffffffff, 0x0000000f,
597         0x9220, 0xffffffff, 0x000e000d,
598         0x9224, 0xffffffff, 0x00110010,
599         0x9228, 0xffffffff, 0x000b000a,
600         0x922c, 0xffffffff, 0x000d000c,
601         0x9230, 0xffffffff, 0x00000010,
602         0x9234, 0xffffffff, 0x000f000e,
603         0x9238, 0xffffffff, 0x00120011,
604         0x923c, 0xffffffff, 0x000c000b,
605         0x9240, 0xffffffff, 0x000e000d,
606         0x9244, 0xffffffff, 0x00000011,
607         0x9248, 0xffffffff, 0x0010000f,
608         0x924c, 0xffffffff, 0x00130012,
609         0x9250, 0xffffffff, 0x000d000c,
610         0x9254, 0xffffffff, 0x000f000e,
611         0x9258, 0xffffffff, 0x00100013,
612         0x925c, 0xffffffff, 0x00120011,
613         0x9260, 0xffffffff, 0x00150014,
614         0x9150, 0xffffffff, 0x96940200,
615         0x8708, 0xffffffff, 0x00900100,
616         0xc478, 0xffffffff, 0x00000080,
617         0xc404, 0xffffffff, 0x0020003f,
618         0x30, 0xffffffff, 0x0000001c,
619         0x34, 0x000f0000, 0x000f0000,
620         0x160c, 0xffffffff, 0x00000100,
621         0x1024, 0xffffffff, 0x00000100,
622         0x102c, 0x00000101, 0x00000000,
623         0x20a8, 0xffffffff, 0x00000104,
624         0x264c, 0x000c0000, 0x000c0000,
625         0x2648, 0x000c0000, 0x000c0000,
626         0x55e4, 0xff000fff, 0x00000100,
627         0x55e8, 0x00000001, 0x00000001,
628         0x2f50, 0x00000001, 0x00000001,
629         0x30cc, 0xc0000fff, 0x00000104,
630         0xc1e4, 0x00000001, 0x00000001,
631         0xd0c0, 0xfffffff0, 0x00000100,
632         0xd8c0, 0xfffffff0, 0x00000100
633 };
634
635 static const u32 oland_mgcg_cgcg_init[] =
636 {
637         0xc400, 0xffffffff, 0xfffffffc,
638         0x802c, 0xffffffff, 0xe0000000,
639         0x9a60, 0xffffffff, 0x00000100,
640         0x92a4, 0xffffffff, 0x00000100,
641         0xc164, 0xffffffff, 0x00000100,
642         0x9774, 0xffffffff, 0x00000100,
643         0x8984, 0xffffffff, 0x06000100,
644         0x8a18, 0xffffffff, 0x00000100,
645         0x92a0, 0xffffffff, 0x00000100,
646         0xc380, 0xffffffff, 0x00000100,
647         0x8b28, 0xffffffff, 0x00000100,
648         0x9144, 0xffffffff, 0x00000100,
649         0x8d88, 0xffffffff, 0x00000100,
650         0x8d8c, 0xffffffff, 0x00000100,
651         0x9030, 0xffffffff, 0x00000100,
652         0x9034, 0xffffffff, 0x00000100,
653         0x9038, 0xffffffff, 0x00000100,
654         0x903c, 0xffffffff, 0x00000100,
655         0xad80, 0xffffffff, 0x00000100,
656         0xac54, 0xffffffff, 0x00000100,
657         0x897c, 0xffffffff, 0x06000100,
658         0x9868, 0xffffffff, 0x00000100,
659         0x9510, 0xffffffff, 0x00000100,
660         0xaf04, 0xffffffff, 0x00000100,
661         0xae04, 0xffffffff, 0x00000100,
662         0x949c, 0xffffffff, 0x00000100,
663         0x802c, 0xffffffff, 0xe0000000,
664         0x9160, 0xffffffff, 0x00010000,
665         0x9164, 0xffffffff, 0x00030002,
666         0x9168, 0xffffffff, 0x00040007,
667         0x916c, 0xffffffff, 0x00060005,
668         0x9170, 0xffffffff, 0x00090008,
669         0x9174, 0xffffffff, 0x00020001,
670         0x9178, 0xffffffff, 0x00040003,
671         0x917c, 0xffffffff, 0x00000007,
672         0x9180, 0xffffffff, 0x00060005,
673         0x9184, 0xffffffff, 0x00090008,
674         0x9188, 0xffffffff, 0x00030002,
675         0x918c, 0xffffffff, 0x00050004,
676         0x9190, 0xffffffff, 0x00000008,
677         0x9194, 0xffffffff, 0x00070006,
678         0x9198, 0xffffffff, 0x000a0009,
679         0x919c, 0xffffffff, 0x00040003,
680         0x91a0, 0xffffffff, 0x00060005,
681         0x91a4, 0xffffffff, 0x00000009,
682         0x91a8, 0xffffffff, 0x00080007,
683         0x91ac, 0xffffffff, 0x000b000a,
684         0x91b0, 0xffffffff, 0x00050004,
685         0x91b4, 0xffffffff, 0x00070006,
686         0x91b8, 0xffffffff, 0x0008000b,
687         0x91bc, 0xffffffff, 0x000a0009,
688         0x91c0, 0xffffffff, 0x000d000c,
689         0x91c4, 0xffffffff, 0x00060005,
690         0x91c8, 0xffffffff, 0x00080007,
691         0x91cc, 0xffffffff, 0x0000000b,
692         0x91d0, 0xffffffff, 0x000a0009,
693         0x91d4, 0xffffffff, 0x000d000c,
694         0x9150, 0xffffffff, 0x96940200,
695         0x8708, 0xffffffff, 0x00900100,
696         0xc478, 0xffffffff, 0x00000080,
697         0xc404, 0xffffffff, 0x0020003f,
698         0x30, 0xffffffff, 0x0000001c,
699         0x34, 0x000f0000, 0x000f0000,
700         0x160c, 0xffffffff, 0x00000100,
701         0x1024, 0xffffffff, 0x00000100,
702         0x102c, 0x00000101, 0x00000000,
703         0x20a8, 0xffffffff, 0x00000104,
704         0x264c, 0x000c0000, 0x000c0000,
705         0x2648, 0x000c0000, 0x000c0000,
706         0x55e4, 0xff000fff, 0x00000100,
707         0x55e8, 0x00000001, 0x00000001,
708         0x2f50, 0x00000001, 0x00000001,
709         0x30cc, 0xc0000fff, 0x00000104,
710         0xc1e4, 0x00000001, 0x00000001,
711         0xd0c0, 0xfffffff0, 0x00000100,
712         0xd8c0, 0xfffffff0, 0x00000100
713 };
714
715 static const u32 hainan_mgcg_cgcg_init[] =
716 {
717         0xc400, 0xffffffff, 0xfffffffc,
718         0x802c, 0xffffffff, 0xe0000000,
719         0x9a60, 0xffffffff, 0x00000100,
720         0x92a4, 0xffffffff, 0x00000100,
721         0xc164, 0xffffffff, 0x00000100,
722         0x9774, 0xffffffff, 0x00000100,
723         0x8984, 0xffffffff, 0x06000100,
724         0x8a18, 0xffffffff, 0x00000100,
725         0x92a0, 0xffffffff, 0x00000100,
726         0xc380, 0xffffffff, 0x00000100,
727         0x8b28, 0xffffffff, 0x00000100,
728         0x9144, 0xffffffff, 0x00000100,
729         0x8d88, 0xffffffff, 0x00000100,
730         0x8d8c, 0xffffffff, 0x00000100,
731         0x9030, 0xffffffff, 0x00000100,
732         0x9034, 0xffffffff, 0x00000100,
733         0x9038, 0xffffffff, 0x00000100,
734         0x903c, 0xffffffff, 0x00000100,
735         0xad80, 0xffffffff, 0x00000100,
736         0xac54, 0xffffffff, 0x00000100,
737         0x897c, 0xffffffff, 0x06000100,
738         0x9868, 0xffffffff, 0x00000100,
739         0x9510, 0xffffffff, 0x00000100,
740         0xaf04, 0xffffffff, 0x00000100,
741         0xae04, 0xffffffff, 0x00000100,
742         0x949c, 0xffffffff, 0x00000100,
743         0x802c, 0xffffffff, 0xe0000000,
744         0x9160, 0xffffffff, 0x00010000,
745         0x9164, 0xffffffff, 0x00030002,
746         0x9168, 0xffffffff, 0x00040007,
747         0x916c, 0xffffffff, 0x00060005,
748         0x9170, 0xffffffff, 0x00090008,
749         0x9174, 0xffffffff, 0x00020001,
750         0x9178, 0xffffffff, 0x00040003,
751         0x917c, 0xffffffff, 0x00000007,
752         0x9180, 0xffffffff, 0x00060005,
753         0x9184, 0xffffffff, 0x00090008,
754         0x9188, 0xffffffff, 0x00030002,
755         0x918c, 0xffffffff, 0x00050004,
756         0x9190, 0xffffffff, 0x00000008,
757         0x9194, 0xffffffff, 0x00070006,
758         0x9198, 0xffffffff, 0x000a0009,
759         0x919c, 0xffffffff, 0x00040003,
760         0x91a0, 0xffffffff, 0x00060005,
761         0x91a4, 0xffffffff, 0x00000009,
762         0x91a8, 0xffffffff, 0x00080007,
763         0x91ac, 0xffffffff, 0x000b000a,
764         0x91b0, 0xffffffff, 0x00050004,
765         0x91b4, 0xffffffff, 0x00070006,
766         0x91b8, 0xffffffff, 0x0008000b,
767         0x91bc, 0xffffffff, 0x000a0009,
768         0x91c0, 0xffffffff, 0x000d000c,
769         0x91c4, 0xffffffff, 0x00060005,
770         0x91c8, 0xffffffff, 0x00080007,
771         0x91cc, 0xffffffff, 0x0000000b,
772         0x91d0, 0xffffffff, 0x000a0009,
773         0x91d4, 0xffffffff, 0x000d000c,
774         0x9150, 0xffffffff, 0x96940200,
775         0x8708, 0xffffffff, 0x00900100,
776         0xc478, 0xffffffff, 0x00000080,
777         0xc404, 0xffffffff, 0x0020003f,
778         0x30, 0xffffffff, 0x0000001c,
779         0x34, 0x000f0000, 0x000f0000,
780         0x160c, 0xffffffff, 0x00000100,
781         0x1024, 0xffffffff, 0x00000100,
782         0x20a8, 0xffffffff, 0x00000104,
783         0x264c, 0x000c0000, 0x000c0000,
784         0x2648, 0x000c0000, 0x000c0000,
785         0x2f50, 0x00000001, 0x00000001,
786         0x30cc, 0xc0000fff, 0x00000104,
787         0xc1e4, 0x00000001, 0x00000001,
788         0xd0c0, 0xfffffff0, 0x00000100,
789         0xd8c0, 0xfffffff0, 0x00000100
790 };
791
792 static u32 verde_pg_init[] =
793 {
794         0x353c, 0xffffffff, 0x40000,
795         0x3538, 0xffffffff, 0x200010ff,
796         0x353c, 0xffffffff, 0x0,
797         0x353c, 0xffffffff, 0x0,
798         0x353c, 0xffffffff, 0x0,
799         0x353c, 0xffffffff, 0x0,
800         0x353c, 0xffffffff, 0x0,
801         0x353c, 0xffffffff, 0x7007,
802         0x3538, 0xffffffff, 0x300010ff,
803         0x353c, 0xffffffff, 0x0,
804         0x353c, 0xffffffff, 0x0,
805         0x353c, 0xffffffff, 0x0,
806         0x353c, 0xffffffff, 0x0,
807         0x353c, 0xffffffff, 0x0,
808         0x353c, 0xffffffff, 0x400000,
809         0x3538, 0xffffffff, 0x100010ff,
810         0x353c, 0xffffffff, 0x0,
811         0x353c, 0xffffffff, 0x0,
812         0x353c, 0xffffffff, 0x0,
813         0x353c, 0xffffffff, 0x0,
814         0x353c, 0xffffffff, 0x0,
815         0x353c, 0xffffffff, 0x120200,
816         0x3538, 0xffffffff, 0x500010ff,
817         0x353c, 0xffffffff, 0x0,
818         0x353c, 0xffffffff, 0x0,
819         0x353c, 0xffffffff, 0x0,
820         0x353c, 0xffffffff, 0x0,
821         0x353c, 0xffffffff, 0x0,
822         0x353c, 0xffffffff, 0x1e1e16,
823         0x3538, 0xffffffff, 0x600010ff,
824         0x353c, 0xffffffff, 0x0,
825         0x353c, 0xffffffff, 0x0,
826         0x353c, 0xffffffff, 0x0,
827         0x353c, 0xffffffff, 0x0,
828         0x353c, 0xffffffff, 0x0,
829         0x353c, 0xffffffff, 0x171f1e,
830         0x3538, 0xffffffff, 0x700010ff,
831         0x353c, 0xffffffff, 0x0,
832         0x353c, 0xffffffff, 0x0,
833         0x353c, 0xffffffff, 0x0,
834         0x353c, 0xffffffff, 0x0,
835         0x353c, 0xffffffff, 0x0,
836         0x353c, 0xffffffff, 0x0,
837         0x3538, 0xffffffff, 0x9ff,
838         0x3500, 0xffffffff, 0x0,
839         0x3504, 0xffffffff, 0x10000800,
840         0x3504, 0xffffffff, 0xf,
841         0x3504, 0xffffffff, 0xf,
842         0x3500, 0xffffffff, 0x4,
843         0x3504, 0xffffffff, 0x1000051e,
844         0x3504, 0xffffffff, 0xffff,
845         0x3504, 0xffffffff, 0xffff,
846         0x3500, 0xffffffff, 0x8,
847         0x3504, 0xffffffff, 0x80500,
848         0x3500, 0xffffffff, 0x12,
849         0x3504, 0xffffffff, 0x9050c,
850         0x3500, 0xffffffff, 0x1d,
851         0x3504, 0xffffffff, 0xb052c,
852         0x3500, 0xffffffff, 0x2a,
853         0x3504, 0xffffffff, 0x1053e,
854         0x3500, 0xffffffff, 0x2d,
855         0x3504, 0xffffffff, 0x10546,
856         0x3500, 0xffffffff, 0x30,
857         0x3504, 0xffffffff, 0xa054e,
858         0x3500, 0xffffffff, 0x3c,
859         0x3504, 0xffffffff, 0x1055f,
860         0x3500, 0xffffffff, 0x3f,
861         0x3504, 0xffffffff, 0x10567,
862         0x3500, 0xffffffff, 0x42,
863         0x3504, 0xffffffff, 0x1056f,
864         0x3500, 0xffffffff, 0x45,
865         0x3504, 0xffffffff, 0x10572,
866         0x3500, 0xffffffff, 0x48,
867         0x3504, 0xffffffff, 0x20575,
868         0x3500, 0xffffffff, 0x4c,
869         0x3504, 0xffffffff, 0x190801,
870         0x3500, 0xffffffff, 0x67,
871         0x3504, 0xffffffff, 0x1082a,
872         0x3500, 0xffffffff, 0x6a,
873         0x3504, 0xffffffff, 0x1b082d,
874         0x3500, 0xffffffff, 0x87,
875         0x3504, 0xffffffff, 0x310851,
876         0x3500, 0xffffffff, 0xba,
877         0x3504, 0xffffffff, 0x891,
878         0x3500, 0xffffffff, 0xbc,
879         0x3504, 0xffffffff, 0x893,
880         0x3500, 0xffffffff, 0xbe,
881         0x3504, 0xffffffff, 0x20895,
882         0x3500, 0xffffffff, 0xc2,
883         0x3504, 0xffffffff, 0x20899,
884         0x3500, 0xffffffff, 0xc6,
885         0x3504, 0xffffffff, 0x2089d,
886         0x3500, 0xffffffff, 0xca,
887         0x3504, 0xffffffff, 0x8a1,
888         0x3500, 0xffffffff, 0xcc,
889         0x3504, 0xffffffff, 0x8a3,
890         0x3500, 0xffffffff, 0xce,
891         0x3504, 0xffffffff, 0x308a5,
892         0x3500, 0xffffffff, 0xd3,
893         0x3504, 0xffffffff, 0x6d08cd,
894         0x3500, 0xffffffff, 0x142,
895         0x3504, 0xffffffff, 0x2000095a,
896         0x3504, 0xffffffff, 0x1,
897         0x3500, 0xffffffff, 0x144,
898         0x3504, 0xffffffff, 0x301f095b,
899         0x3500, 0xffffffff, 0x165,
900         0x3504, 0xffffffff, 0xc094d,
901         0x3500, 0xffffffff, 0x173,
902         0x3504, 0xffffffff, 0xf096d,
903         0x3500, 0xffffffff, 0x184,
904         0x3504, 0xffffffff, 0x15097f,
905         0x3500, 0xffffffff, 0x19b,
906         0x3504, 0xffffffff, 0xc0998,
907         0x3500, 0xffffffff, 0x1a9,
908         0x3504, 0xffffffff, 0x409a7,
909         0x3500, 0xffffffff, 0x1af,
910         0x3504, 0xffffffff, 0xcdc,
911         0x3500, 0xffffffff, 0x1b1,
912         0x3504, 0xffffffff, 0x800,
913         0x3508, 0xffffffff, 0x6c9b2000,
914         0x3510, 0xfc00, 0x2000,
915         0x3544, 0xffffffff, 0xfc0,
916         0x28d4, 0x00000100, 0x100
917 };
918
919 static void si_init_golden_registers(struct radeon_device *rdev)
920 {
921         switch (rdev->family) {
922         case CHIP_TAHITI:
923                 radeon_program_register_sequence(rdev,
924                                                  tahiti_golden_registers,
925                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
926                 radeon_program_register_sequence(rdev,
927                                                  tahiti_golden_rlc_registers,
928                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
929                 radeon_program_register_sequence(rdev,
930                                                  tahiti_mgcg_cgcg_init,
931                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
932                 radeon_program_register_sequence(rdev,
933                                                  tahiti_golden_registers2,
934                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
935                 break;
936         case CHIP_PITCAIRN:
937                 radeon_program_register_sequence(rdev,
938                                                  pitcairn_golden_registers,
939                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
940                 radeon_program_register_sequence(rdev,
941                                                  pitcairn_golden_rlc_registers,
942                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
943                 radeon_program_register_sequence(rdev,
944                                                  pitcairn_mgcg_cgcg_init,
945                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
946                 break;
947         case CHIP_VERDE:
948                 radeon_program_register_sequence(rdev,
949                                                  verde_golden_registers,
950                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
951                 radeon_program_register_sequence(rdev,
952                                                  verde_golden_rlc_registers,
953                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
954                 radeon_program_register_sequence(rdev,
955                                                  verde_mgcg_cgcg_init,
956                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
957                 radeon_program_register_sequence(rdev,
958                                                  verde_pg_init,
959                                                  (const u32)ARRAY_SIZE(verde_pg_init));
960                 break;
961         case CHIP_OLAND:
962                 radeon_program_register_sequence(rdev,
963                                                  oland_golden_registers,
964                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
965                 radeon_program_register_sequence(rdev,
966                                                  oland_golden_rlc_registers,
967                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
968                 radeon_program_register_sequence(rdev,
969                                                  oland_mgcg_cgcg_init,
970                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
971                 break;
972         case CHIP_HAINAN:
973                 radeon_program_register_sequence(rdev,
974                                                  hainan_golden_registers,
975                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
976                 radeon_program_register_sequence(rdev,
977                                                  hainan_golden_registers2,
978                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
979                 radeon_program_register_sequence(rdev,
980                                                  hainan_mgcg_cgcg_init,
981                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
982                 break;
983         default:
984                 break;
985         }
986 }
987
988 #define PCIE_BUS_CLK                10000
989 #define TCLK                        (PCIE_BUS_CLK / 10)
990
991 /**
992  * si_get_xclk - get the xclk
993  *
994  * @rdev: radeon_device pointer
995  *
996  * Returns the reference clock used by the gfx engine
997  * (SI).
998  */
999 u32 si_get_xclk(struct radeon_device *rdev)
1000 {
1001         u32 reference_clock = rdev->clock.spll.reference_freq;
1002         u32 tmp;
1003
1004         tmp = RREG32(CG_CLKPIN_CNTL_2);
1005         if (tmp & MUX_TCLK_TO_XCLK)
1006                 return TCLK;
1007
1008         tmp = RREG32(CG_CLKPIN_CNTL);
1009         if (tmp & XTALIN_DIVIDE)
1010                 return reference_clock / 4;
1011
1012         return reference_clock;
1013 }
1014
1015 /* get temperature in millidegrees */
1016 int si_get_temp(struct radeon_device *rdev)
1017 {
1018         u32 temp;
1019         int actual_temp = 0;
1020
1021         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1022                 CTF_TEMP_SHIFT;
1023
1024         if (temp & 0x200)
1025                 actual_temp = 255;
1026         else
1027                 actual_temp = temp & 0x1ff;
1028
1029         actual_temp = (actual_temp * 1000);
1030
1031         return actual_temp;
1032 }
1033
1034 #define TAHITI_IO_MC_REGS_SIZE 36
1035
1036 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1037         {0x0000006f, 0x03044000},
1038         {0x00000070, 0x0480c018},
1039         {0x00000071, 0x00000040},
1040         {0x00000072, 0x01000000},
1041         {0x00000074, 0x000000ff},
1042         {0x00000075, 0x00143400},
1043         {0x00000076, 0x08ec0800},
1044         {0x00000077, 0x040000cc},
1045         {0x00000079, 0x00000000},
1046         {0x0000007a, 0x21000409},
1047         {0x0000007c, 0x00000000},
1048         {0x0000007d, 0xe8000000},
1049         {0x0000007e, 0x044408a8},
1050         {0x0000007f, 0x00000003},
1051         {0x00000080, 0x00000000},
1052         {0x00000081, 0x01000000},
1053         {0x00000082, 0x02000000},
1054         {0x00000083, 0x00000000},
1055         {0x00000084, 0xe3f3e4f4},
1056         {0x00000085, 0x00052024},
1057         {0x00000087, 0x00000000},
1058         {0x00000088, 0x66036603},
1059         {0x00000089, 0x01000000},
1060         {0x0000008b, 0x1c0a0000},
1061         {0x0000008c, 0xff010000},
1062         {0x0000008e, 0xffffefff},
1063         {0x0000008f, 0xfff3efff},
1064         {0x00000090, 0xfff3efbf},
1065         {0x00000094, 0x00101101},
1066         {0x00000095, 0x00000fff},
1067         {0x00000096, 0x00116fff},
1068         {0x00000097, 0x60010000},
1069         {0x00000098, 0x10010000},
1070         {0x00000099, 0x00006000},
1071         {0x0000009a, 0x00001000},
1072         {0x0000009f, 0x00a77400}
1073 };
1074
1075 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1076         {0x0000006f, 0x03044000},
1077         {0x00000070, 0x0480c018},
1078         {0x00000071, 0x00000040},
1079         {0x00000072, 0x01000000},
1080         {0x00000074, 0x000000ff},
1081         {0x00000075, 0x00143400},
1082         {0x00000076, 0x08ec0800},
1083         {0x00000077, 0x040000cc},
1084         {0x00000079, 0x00000000},
1085         {0x0000007a, 0x21000409},
1086         {0x0000007c, 0x00000000},
1087         {0x0000007d, 0xe8000000},
1088         {0x0000007e, 0x044408a8},
1089         {0x0000007f, 0x00000003},
1090         {0x00000080, 0x00000000},
1091         {0x00000081, 0x01000000},
1092         {0x00000082, 0x02000000},
1093         {0x00000083, 0x00000000},
1094         {0x00000084, 0xe3f3e4f4},
1095         {0x00000085, 0x00052024},
1096         {0x00000087, 0x00000000},
1097         {0x00000088, 0x66036603},
1098         {0x00000089, 0x01000000},
1099         {0x0000008b, 0x1c0a0000},
1100         {0x0000008c, 0xff010000},
1101         {0x0000008e, 0xffffefff},
1102         {0x0000008f, 0xfff3efff},
1103         {0x00000090, 0xfff3efbf},
1104         {0x00000094, 0x00101101},
1105         {0x00000095, 0x00000fff},
1106         {0x00000096, 0x00116fff},
1107         {0x00000097, 0x60010000},
1108         {0x00000098, 0x10010000},
1109         {0x00000099, 0x00006000},
1110         {0x0000009a, 0x00001000},
1111         {0x0000009f, 0x00a47400}
1112 };
1113
1114 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1115         {0x0000006f, 0x03044000},
1116         {0x00000070, 0x0480c018},
1117         {0x00000071, 0x00000040},
1118         {0x00000072, 0x01000000},
1119         {0x00000074, 0x000000ff},
1120         {0x00000075, 0x00143400},
1121         {0x00000076, 0x08ec0800},
1122         {0x00000077, 0x040000cc},
1123         {0x00000079, 0x00000000},
1124         {0x0000007a, 0x21000409},
1125         {0x0000007c, 0x00000000},
1126         {0x0000007d, 0xe8000000},
1127         {0x0000007e, 0x044408a8},
1128         {0x0000007f, 0x00000003},
1129         {0x00000080, 0x00000000},
1130         {0x00000081, 0x01000000},
1131         {0x00000082, 0x02000000},
1132         {0x00000083, 0x00000000},
1133         {0x00000084, 0xe3f3e4f4},
1134         {0x00000085, 0x00052024},
1135         {0x00000087, 0x00000000},
1136         {0x00000088, 0x66036603},
1137         {0x00000089, 0x01000000},
1138         {0x0000008b, 0x1c0a0000},
1139         {0x0000008c, 0xff010000},
1140         {0x0000008e, 0xffffefff},
1141         {0x0000008f, 0xfff3efff},
1142         {0x00000090, 0xfff3efbf},
1143         {0x00000094, 0x00101101},
1144         {0x00000095, 0x00000fff},
1145         {0x00000096, 0x00116fff},
1146         {0x00000097, 0x60010000},
1147         {0x00000098, 0x10010000},
1148         {0x00000099, 0x00006000},
1149         {0x0000009a, 0x00001000},
1150         {0x0000009f, 0x00a37400}
1151 };
1152
1153 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1154         {0x0000006f, 0x03044000},
1155         {0x00000070, 0x0480c018},
1156         {0x00000071, 0x00000040},
1157         {0x00000072, 0x01000000},
1158         {0x00000074, 0x000000ff},
1159         {0x00000075, 0x00143400},
1160         {0x00000076, 0x08ec0800},
1161         {0x00000077, 0x040000cc},
1162         {0x00000079, 0x00000000},
1163         {0x0000007a, 0x21000409},
1164         {0x0000007c, 0x00000000},
1165         {0x0000007d, 0xe8000000},
1166         {0x0000007e, 0x044408a8},
1167         {0x0000007f, 0x00000003},
1168         {0x00000080, 0x00000000},
1169         {0x00000081, 0x01000000},
1170         {0x00000082, 0x02000000},
1171         {0x00000083, 0x00000000},
1172         {0x00000084, 0xe3f3e4f4},
1173         {0x00000085, 0x00052024},
1174         {0x00000087, 0x00000000},
1175         {0x00000088, 0x66036603},
1176         {0x00000089, 0x01000000},
1177         {0x0000008b, 0x1c0a0000},
1178         {0x0000008c, 0xff010000},
1179         {0x0000008e, 0xffffefff},
1180         {0x0000008f, 0xfff3efff},
1181         {0x00000090, 0xfff3efbf},
1182         {0x00000094, 0x00101101},
1183         {0x00000095, 0x00000fff},
1184         {0x00000096, 0x00116fff},
1185         {0x00000097, 0x60010000},
1186         {0x00000098, 0x10010000},
1187         {0x00000099, 0x00006000},
1188         {0x0000009a, 0x00001000},
1189         {0x0000009f, 0x00a17730}
1190 };
1191
1192 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1193         {0x0000006f, 0x03044000},
1194         {0x00000070, 0x0480c018},
1195         {0x00000071, 0x00000040},
1196         {0x00000072, 0x01000000},
1197         {0x00000074, 0x000000ff},
1198         {0x00000075, 0x00143400},
1199         {0x00000076, 0x08ec0800},
1200         {0x00000077, 0x040000cc},
1201         {0x00000079, 0x00000000},
1202         {0x0000007a, 0x21000409},
1203         {0x0000007c, 0x00000000},
1204         {0x0000007d, 0xe8000000},
1205         {0x0000007e, 0x044408a8},
1206         {0x0000007f, 0x00000003},
1207         {0x00000080, 0x00000000},
1208         {0x00000081, 0x01000000},
1209         {0x00000082, 0x02000000},
1210         {0x00000083, 0x00000000},
1211         {0x00000084, 0xe3f3e4f4},
1212         {0x00000085, 0x00052024},
1213         {0x00000087, 0x00000000},
1214         {0x00000088, 0x66036603},
1215         {0x00000089, 0x01000000},
1216         {0x0000008b, 0x1c0a0000},
1217         {0x0000008c, 0xff010000},
1218         {0x0000008e, 0xffffefff},
1219         {0x0000008f, 0xfff3efff},
1220         {0x00000090, 0xfff3efbf},
1221         {0x00000094, 0x00101101},
1222         {0x00000095, 0x00000fff},
1223         {0x00000096, 0x00116fff},
1224         {0x00000097, 0x60010000},
1225         {0x00000098, 0x10010000},
1226         {0x00000099, 0x00006000},
1227         {0x0000009a, 0x00001000},
1228         {0x0000009f, 0x00a07730}
1229 };
1230
1231 /* ucode loading */
1232 static int si_mc_load_microcode(struct radeon_device *rdev)
1233 {
1234         const __be32 *fw_data;
1235         u32 running, blackout = 0;
1236         u32 *io_mc_regs;
1237         int i, ucode_size, regs_size;
1238
1239         if (!rdev->mc_fw)
1240                 return -EINVAL;
1241
1242         switch (rdev->family) {
1243         case CHIP_TAHITI:
1244                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1245                 ucode_size = SI_MC_UCODE_SIZE;
1246                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1247                 break;
1248         case CHIP_PITCAIRN:
1249                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1250                 ucode_size = SI_MC_UCODE_SIZE;
1251                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1252                 break;
1253         case CHIP_VERDE:
1254         default:
1255                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1256                 ucode_size = SI_MC_UCODE_SIZE;
1257                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1258                 break;
1259         case CHIP_OLAND:
1260                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1261                 ucode_size = OLAND_MC_UCODE_SIZE;
1262                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1263                 break;
1264         case CHIP_HAINAN:
1265                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1266                 ucode_size = OLAND_MC_UCODE_SIZE;
1267                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1268                 break;
1269         }
1270
1271         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1272
1273         if (running == 0) {
1274                 if (running) {
1275                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1276                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1277                 }
1278
1279                 /* reset the engine and set to writable */
1280                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1281                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1282
1283                 /* load mc io regs */
1284                 for (i = 0; i < regs_size; i++) {
1285                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1286                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1287                 }
1288                 /* load the MC ucode */
1289                 fw_data = (const __be32 *)rdev->mc_fw->data;
1290                 for (i = 0; i < ucode_size; i++)
1291                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1292
1293                 /* put the engine back into the active state */
1294                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1295                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1296                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1297
1298                 /* wait for training to complete */
1299                 for (i = 0; i < rdev->usec_timeout; i++) {
1300                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1301                                 break;
1302                         udelay(1);
1303                 }
1304                 for (i = 0; i < rdev->usec_timeout; i++) {
1305                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1306                                 break;
1307                         udelay(1);
1308                 }
1309
1310                 if (running)
1311                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1312         }
1313
1314         return 0;
1315 }
1316
1317 static int si_init_microcode(struct radeon_device *rdev)
1318 {
1319         struct platform_device *pdev;
1320         const char *chip_name;
1321         const char *rlc_chip_name;
1322         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1323         char fw_name[30];
1324         int err;
1325
1326         DRM_DEBUG("\n");
1327
1328         pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1329         err = IS_ERR(pdev);
1330         if (err) {
1331                 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1332                 return -EINVAL;
1333         }
1334
1335         switch (rdev->family) {
1336         case CHIP_TAHITI:
1337                 chip_name = "TAHITI";
1338                 rlc_chip_name = "TAHITI";
1339                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1340                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1341                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1342                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1343                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1344                 break;
1345         case CHIP_PITCAIRN:
1346                 chip_name = "PITCAIRN";
1347                 rlc_chip_name = "PITCAIRN";
1348                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1349                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1350                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1351                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1352                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1353                 break;
1354         case CHIP_VERDE:
1355                 chip_name = "VERDE";
1356                 rlc_chip_name = "VERDE";
1357                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1358                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1359                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1360                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1361                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1362                 break;
1363         case CHIP_OLAND:
1364                 chip_name = "OLAND";
1365                 rlc_chip_name = "OLAND";
1366                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1367                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1368                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1369                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1370                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1371                 break;
1372         case CHIP_HAINAN:
1373                 chip_name = "HAINAN";
1374                 rlc_chip_name = "HAINAN";
1375                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1376                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1377                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1378                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1379                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1380                 break;
1381         default: BUG();
1382         }
1383
1384         DRM_INFO("Loading %s Microcode\n", chip_name);
1385
1386         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1387         err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1388         if (err)
1389                 goto out;
1390         if (rdev->pfp_fw->size != pfp_req_size) {
1391                 printk(KERN_ERR
1392                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1393                        rdev->pfp_fw->size, fw_name);
1394                 err = -EINVAL;
1395                 goto out;
1396         }
1397
1398         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1399         err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1400         if (err)
1401                 goto out;
1402         if (rdev->me_fw->size != me_req_size) {
1403                 printk(KERN_ERR
1404                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1405                        rdev->me_fw->size, fw_name);
1406                 err = -EINVAL;
1407         }
1408
1409         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1410         err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1411         if (err)
1412                 goto out;
1413         if (rdev->ce_fw->size != ce_req_size) {
1414                 printk(KERN_ERR
1415                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1416                        rdev->ce_fw->size, fw_name);
1417                 err = -EINVAL;
1418         }
1419
1420         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1421         err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1422         if (err)
1423                 goto out;
1424         if (rdev->rlc_fw->size != rlc_req_size) {
1425                 printk(KERN_ERR
1426                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1427                        rdev->rlc_fw->size, fw_name);
1428                 err = -EINVAL;
1429         }
1430
1431         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1432         err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1433         if (err)
1434                 goto out;
1435         if (rdev->mc_fw->size != mc_req_size) {
1436                 printk(KERN_ERR
1437                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1438                        rdev->mc_fw->size, fw_name);
1439                 err = -EINVAL;
1440         }
1441
1442 out:
1443         platform_device_unregister(pdev);
1444
1445         if (err) {
1446                 if (err != -EINVAL)
1447                         printk(KERN_ERR
1448                                "si_cp: Failed to load firmware \"%s\"\n",
1449                                fw_name);
1450                 release_firmware(rdev->pfp_fw);
1451                 rdev->pfp_fw = NULL;
1452                 release_firmware(rdev->me_fw);
1453                 rdev->me_fw = NULL;
1454                 release_firmware(rdev->ce_fw);
1455                 rdev->ce_fw = NULL;
1456                 release_firmware(rdev->rlc_fw);
1457                 rdev->rlc_fw = NULL;
1458                 release_firmware(rdev->mc_fw);
1459                 rdev->mc_fw = NULL;
1460         }
1461         return err;
1462 }
1463
1464 /* watermark setup */
1465 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1466                                    struct radeon_crtc *radeon_crtc,
1467                                    struct drm_display_mode *mode,
1468                                    struct drm_display_mode *other_mode)
1469 {
1470         u32 tmp, buffer_alloc, i;
1471         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1472         /*
1473          * Line Buffer Setup
1474          * There are 3 line buffers, each one shared by 2 display controllers.
1475          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1476          * the display controllers.  The paritioning is done via one of four
1477          * preset allocations specified in bits 21:20:
1478          *  0 - half lb
1479          *  2 - whole lb, other crtc must be disabled
1480          */
1481         /* this can get tricky if we have two large displays on a paired group
1482          * of crtcs.  Ideally for multiple large displays we'd assign them to
1483          * non-linked crtcs for maximum line buffer allocation.
1484          */
1485         if (radeon_crtc->base.enabled && mode) {
1486                 if (other_mode) {
1487                         tmp = 0; /* 1/2 */
1488                         buffer_alloc = 1;
1489                 } else {
1490                         tmp = 2; /* whole */
1491                         buffer_alloc = 2;
1492                 }
1493         } else {
1494                 tmp = 0;
1495                 buffer_alloc = 0;
1496         }
1497
1498         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1499                DC_LB_MEMORY_CONFIG(tmp));
1500
1501         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1502                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1503         for (i = 0; i < rdev->usec_timeout; i++) {
1504                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1505                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1506                         break;
1507                 udelay(1);
1508         }
1509
1510         if (radeon_crtc->base.enabled && mode) {
1511                 switch (tmp) {
1512                 case 0:
1513                 default:
1514                         return 4096 * 2;
1515                 case 2:
1516                         return 8192 * 2;
1517                 }
1518         }
1519
1520         /* controller not enabled, so no lb used */
1521         return 0;
1522 }
1523
1524 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1525 {
1526         u32 tmp = RREG32(MC_SHARED_CHMAP);
1527
1528         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1529         case 0:
1530         default:
1531                 return 1;
1532         case 1:
1533                 return 2;
1534         case 2:
1535                 return 4;
1536         case 3:
1537                 return 8;
1538         case 4:
1539                 return 3;
1540         case 5:
1541                 return 6;
1542         case 6:
1543                 return 10;
1544         case 7:
1545                 return 12;
1546         case 8:
1547                 return 16;
1548         }
1549 }
1550
1551 struct dce6_wm_params {
1552         u32 dram_channels; /* number of dram channels */
1553         u32 yclk;          /* bandwidth per dram data pin in kHz */
1554         u32 sclk;          /* engine clock in kHz */
1555         u32 disp_clk;      /* display clock in kHz */
1556         u32 src_width;     /* viewport width */
1557         u32 active_time;   /* active display time in ns */
1558         u32 blank_time;    /* blank time in ns */
1559         bool interlaced;    /* mode is interlaced */
1560         fixed20_12 vsc;    /* vertical scale ratio */
1561         u32 num_heads;     /* number of active crtcs */
1562         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1563         u32 lb_size;       /* line buffer allocated to pipe */
1564         u32 vtaps;         /* vertical scaler taps */
1565 };
1566
1567 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1568 {
1569         /* Calculate raw DRAM Bandwidth */
1570         fixed20_12 dram_efficiency; /* 0.7 */
1571         fixed20_12 yclk, dram_channels, bandwidth;
1572         fixed20_12 a;
1573
1574         a.full = dfixed_const(1000);
1575         yclk.full = dfixed_const(wm->yclk);
1576         yclk.full = dfixed_div(yclk, a);
1577         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1578         a.full = dfixed_const(10);
1579         dram_efficiency.full = dfixed_const(7);
1580         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1581         bandwidth.full = dfixed_mul(dram_channels, yclk);
1582         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1583
1584         return dfixed_trunc(bandwidth);
1585 }
1586
1587 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1588 {
1589         /* Calculate DRAM Bandwidth and the part allocated to display. */
1590         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1591         fixed20_12 yclk, dram_channels, bandwidth;
1592         fixed20_12 a;
1593
1594         a.full = dfixed_const(1000);
1595         yclk.full = dfixed_const(wm->yclk);
1596         yclk.full = dfixed_div(yclk, a);
1597         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1598         a.full = dfixed_const(10);
1599         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1600         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1601         bandwidth.full = dfixed_mul(dram_channels, yclk);
1602         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1603
1604         return dfixed_trunc(bandwidth);
1605 }
1606
1607 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1608 {
1609         /* Calculate the display Data return Bandwidth */
1610         fixed20_12 return_efficiency; /* 0.8 */
1611         fixed20_12 sclk, bandwidth;
1612         fixed20_12 a;
1613
1614         a.full = dfixed_const(1000);
1615         sclk.full = dfixed_const(wm->sclk);
1616         sclk.full = dfixed_div(sclk, a);
1617         a.full = dfixed_const(10);
1618         return_efficiency.full = dfixed_const(8);
1619         return_efficiency.full = dfixed_div(return_efficiency, a);
1620         a.full = dfixed_const(32);
1621         bandwidth.full = dfixed_mul(a, sclk);
1622         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1623
1624         return dfixed_trunc(bandwidth);
1625 }
1626
1627 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1628 {
1629         return 32;
1630 }
1631
1632 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1633 {
1634         /* Calculate the DMIF Request Bandwidth */
1635         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1636         fixed20_12 disp_clk, sclk, bandwidth;
1637         fixed20_12 a, b1, b2;
1638         u32 min_bandwidth;
1639
1640         a.full = dfixed_const(1000);
1641         disp_clk.full = dfixed_const(wm->disp_clk);
1642         disp_clk.full = dfixed_div(disp_clk, a);
1643         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1644         b1.full = dfixed_mul(a, disp_clk);
1645
1646         a.full = dfixed_const(1000);
1647         sclk.full = dfixed_const(wm->sclk);
1648         sclk.full = dfixed_div(sclk, a);
1649         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1650         b2.full = dfixed_mul(a, sclk);
1651
1652         a.full = dfixed_const(10);
1653         disp_clk_request_efficiency.full = dfixed_const(8);
1654         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1655
1656         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1657
1658         a.full = dfixed_const(min_bandwidth);
1659         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1660
1661         return dfixed_trunc(bandwidth);
1662 }
1663
1664 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1665 {
1666         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1667         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1668         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1669         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1670
1671         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1672 }
1673
1674 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1675 {
1676         /* Calculate the display mode Average Bandwidth
1677          * DisplayMode should contain the source and destination dimensions,
1678          * timing, etc.
1679          */
1680         fixed20_12 bpp;
1681         fixed20_12 line_time;
1682         fixed20_12 src_width;
1683         fixed20_12 bandwidth;
1684         fixed20_12 a;
1685
1686         a.full = dfixed_const(1000);
1687         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1688         line_time.full = dfixed_div(line_time, a);
1689         bpp.full = dfixed_const(wm->bytes_per_pixel);
1690         src_width.full = dfixed_const(wm->src_width);
1691         bandwidth.full = dfixed_mul(src_width, bpp);
1692         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1693         bandwidth.full = dfixed_div(bandwidth, line_time);
1694
1695         return dfixed_trunc(bandwidth);
1696 }
1697
1698 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1699 {
1700         /* First calcualte the latency in ns */
1701         u32 mc_latency = 2000; /* 2000 ns. */
1702         u32 available_bandwidth = dce6_available_bandwidth(wm);
1703         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1704         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1705         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1706         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1707                 (wm->num_heads * cursor_line_pair_return_time);
1708         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1709         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1710         u32 tmp, dmif_size = 12288;
1711         fixed20_12 a, b, c;
1712
1713         if (wm->num_heads == 0)
1714                 return 0;
1715
1716         a.full = dfixed_const(2);
1717         b.full = dfixed_const(1);
1718         if ((wm->vsc.full > a.full) ||
1719             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1720             (wm->vtaps >= 5) ||
1721             ((wm->vsc.full >= a.full) && wm->interlaced))
1722                 max_src_lines_per_dst_line = 4;
1723         else
1724                 max_src_lines_per_dst_line = 2;
1725
1726         a.full = dfixed_const(available_bandwidth);
1727         b.full = dfixed_const(wm->num_heads);
1728         a.full = dfixed_div(a, b);
1729
1730         b.full = dfixed_const(mc_latency + 512);
1731         c.full = dfixed_const(wm->disp_clk);
1732         b.full = dfixed_div(b, c);
1733
1734         c.full = dfixed_const(dmif_size);
1735         b.full = dfixed_div(c, b);
1736
1737         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1738
1739         b.full = dfixed_const(1000);
1740         c.full = dfixed_const(wm->disp_clk);
1741         b.full = dfixed_div(c, b);
1742         c.full = dfixed_const(wm->bytes_per_pixel);
1743         b.full = dfixed_mul(b, c);
1744
1745         lb_fill_bw = min(tmp, dfixed_trunc(b));
1746
1747         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1748         b.full = dfixed_const(1000);
1749         c.full = dfixed_const(lb_fill_bw);
1750         b.full = dfixed_div(c, b);
1751         a.full = dfixed_div(a, b);
1752         line_fill_time = dfixed_trunc(a);
1753
1754         if (line_fill_time < wm->active_time)
1755                 return latency;
1756         else
1757                 return latency + (line_fill_time - wm->active_time);
1758
1759 }
1760
1761 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1762 {
1763         if (dce6_average_bandwidth(wm) <=
1764             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1765                 return true;
1766         else
1767                 return false;
1768 };
1769
1770 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1771 {
1772         if (dce6_average_bandwidth(wm) <=
1773             (dce6_available_bandwidth(wm) / wm->num_heads))
1774                 return true;
1775         else
1776                 return false;
1777 };
1778
1779 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1780 {
1781         u32 lb_partitions = wm->lb_size / wm->src_width;
1782         u32 line_time = wm->active_time + wm->blank_time;
1783         u32 latency_tolerant_lines;
1784         u32 latency_hiding;
1785         fixed20_12 a;
1786
1787         a.full = dfixed_const(1);
1788         if (wm->vsc.full > a.full)
1789                 latency_tolerant_lines = 1;
1790         else {
1791                 if (lb_partitions <= (wm->vtaps + 1))
1792                         latency_tolerant_lines = 1;
1793                 else
1794                         latency_tolerant_lines = 2;
1795         }
1796
1797         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1798
1799         if (dce6_latency_watermark(wm) <= latency_hiding)
1800                 return true;
1801         else
1802                 return false;
1803 }
1804
1805 static void dce6_program_watermarks(struct radeon_device *rdev,
1806                                          struct radeon_crtc *radeon_crtc,
1807                                          u32 lb_size, u32 num_heads)
1808 {
1809         struct drm_display_mode *mode = &radeon_crtc->base.mode;
1810         struct dce6_wm_params wm;
1811         u32 pixel_period;
1812         u32 line_time = 0;
1813         u32 latency_watermark_a = 0, latency_watermark_b = 0;
1814         u32 priority_a_mark = 0, priority_b_mark = 0;
1815         u32 priority_a_cnt = PRIORITY_OFF;
1816         u32 priority_b_cnt = PRIORITY_OFF;
1817         u32 tmp, arb_control3;
1818         fixed20_12 a, b, c;
1819
1820         if (radeon_crtc->base.enabled && num_heads && mode) {
1821                 pixel_period = 1000000 / (u32)mode->clock;
1822                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1823                 priority_a_cnt = 0;
1824                 priority_b_cnt = 0;
1825
1826                 wm.yclk = rdev->pm.current_mclk * 10;
1827                 wm.sclk = rdev->pm.current_sclk * 10;
1828                 wm.disp_clk = mode->clock;
1829                 wm.src_width = mode->crtc_hdisplay;
1830                 wm.active_time = mode->crtc_hdisplay * pixel_period;
1831                 wm.blank_time = line_time - wm.active_time;
1832                 wm.interlaced = false;
1833                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1834                         wm.interlaced = true;
1835                 wm.vsc = radeon_crtc->vsc;
1836                 wm.vtaps = 1;
1837                 if (radeon_crtc->rmx_type != RMX_OFF)
1838                         wm.vtaps = 2;
1839                 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1840                 wm.lb_size = lb_size;
1841                 if (rdev->family == CHIP_ARUBA)
1842                         wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1843                 else
1844                         wm.dram_channels = si_get_number_of_dram_channels(rdev);
1845                 wm.num_heads = num_heads;
1846
1847                 /* set for high clocks */
1848                 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1849                 /* set for low clocks */
1850                 /* wm.yclk = low clk; wm.sclk = low clk */
1851                 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1852
1853                 /* possibly force display priority to high */
1854                 /* should really do this at mode validation time... */
1855                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1856                     !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1857                     !dce6_check_latency_hiding(&wm) ||
1858                     (rdev->disp_priority == 2)) {
1859                         DRM_DEBUG_KMS("force priority to high\n");
1860                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
1861                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
1862                 }
1863
1864                 a.full = dfixed_const(1000);
1865                 b.full = dfixed_const(mode->clock);
1866                 b.full = dfixed_div(b, a);
1867                 c.full = dfixed_const(latency_watermark_a);
1868                 c.full = dfixed_mul(c, b);
1869                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1870                 c.full = dfixed_div(c, a);
1871                 a.full = dfixed_const(16);
1872                 c.full = dfixed_div(c, a);
1873                 priority_a_mark = dfixed_trunc(c);
1874                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1875
1876                 a.full = dfixed_const(1000);
1877                 b.full = dfixed_const(mode->clock);
1878                 b.full = dfixed_div(b, a);
1879                 c.full = dfixed_const(latency_watermark_b);
1880                 c.full = dfixed_mul(c, b);
1881                 c.full = dfixed_mul(c, radeon_crtc->hsc);
1882                 c.full = dfixed_div(c, a);
1883                 a.full = dfixed_const(16);
1884                 c.full = dfixed_div(c, a);
1885                 priority_b_mark = dfixed_trunc(c);
1886                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1887         }
1888
1889         /* select wm A */
1890         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1891         tmp = arb_control3;
1892         tmp &= ~LATENCY_WATERMARK_MASK(3);
1893         tmp |= LATENCY_WATERMARK_MASK(1);
1894         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1895         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1896                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1897                 LATENCY_HIGH_WATERMARK(line_time)));
1898         /* select wm B */
1899         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1900         tmp &= ~LATENCY_WATERMARK_MASK(3);
1901         tmp |= LATENCY_WATERMARK_MASK(2);
1902         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1903         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1904                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1905                 LATENCY_HIGH_WATERMARK(line_time)));
1906         /* restore original selection */
1907         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1908
1909         /* write the priority marks */
1910         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1911         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1912
1913 }
1914
1915 void dce6_bandwidth_update(struct radeon_device *rdev)
1916 {
1917         struct drm_display_mode *mode0 = NULL;
1918         struct drm_display_mode *mode1 = NULL;
1919         u32 num_heads = 0, lb_size;
1920         int i;
1921
1922         radeon_update_display_priority(rdev);
1923
1924         for (i = 0; i < rdev->num_crtc; i++) {
1925                 if (rdev->mode_info.crtcs[i]->base.enabled)
1926                         num_heads++;
1927         }
1928         for (i = 0; i < rdev->num_crtc; i += 2) {
1929                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1930                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1931                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1932                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1933                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1934                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1935         }
1936 }
1937
1938 /*
1939  * Core functions
1940  */
1941 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1942 {
1943         const u32 num_tile_mode_states = 32;
1944         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1945
1946         switch (rdev->config.si.mem_row_size_in_kb) {
1947         case 1:
1948                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1949                 break;
1950         case 2:
1951         default:
1952                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1953                 break;
1954         case 4:
1955                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1956                 break;
1957         }
1958
1959         if ((rdev->family == CHIP_TAHITI) ||
1960             (rdev->family == CHIP_PITCAIRN)) {
1961                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1962                         switch (reg_offset) {
1963                         case 0:  /* non-AA compressed depth or any compressed stencil */
1964                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1965                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1966                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1967                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1968                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1969                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1971                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1972                                 break;
1973                         case 1:  /* 2xAA/4xAA compressed depth only */
1974                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1975                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1976                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1977                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1978                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1979                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1980                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1981                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1982                                 break;
1983                         case 2:  /* 8xAA compressed depth only */
1984                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1985                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1986                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1987                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1988                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1989                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1991                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1992                                 break;
1993                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1994                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1995                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1996                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1997                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1998                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
1999                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2000                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2001                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2002                                 break;
2003                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2004                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2005                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2006                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2007                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2008                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2009                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2012                                 break;
2013                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2014                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2015                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2016                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2017                                                  TILE_SPLIT(split_equal_to_row_size) |
2018                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2019                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2020                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2021                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2022                                 break;
2023                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size) |
2028                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2029                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2030                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2031                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2032                                 break;
2033                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2037                                                  TILE_SPLIT(split_equal_to_row_size) |
2038                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2039                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2040                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2041                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2042                                 break;
2043                         case 8:  /* 1D and 1D Array Surfaces */
2044                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2045                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2046                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2047                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2048                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2049                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2050                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2051                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2052                                 break;
2053                         case 9:  /* Displayable maps. */
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2057                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2058                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2059                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2060                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2061                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2062                                 break;
2063                         case 10:  /* Display 8bpp. */
2064                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2067                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2068                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2069                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2070                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2071                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2072                                 break;
2073                         case 11:  /* Display 16bpp. */
2074                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2075                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2077                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2078                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2079                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2080                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2081                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2082                                 break;
2083                         case 12:  /* Display 32bpp. */
2084                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2085                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2086                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2087                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2088                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2089                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2090                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2091                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2092                                 break;
2093                         case 13:  /* Thin. */
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2096                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2097                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2098                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2099                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2102                                 break;
2103                         case 14:  /* Thin 8 bpp. */
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2107                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2108                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2109                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2110                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2111                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2112                                 break;
2113                         case 15:  /* Thin 16 bpp. */
2114                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2116                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2117                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2118                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2119                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2120                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2121                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2122                                 break;
2123                         case 16:  /* Thin 32 bpp. */
2124                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2126                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2127                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2129                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2132                                 break;
2133                         case 17:  /* Thin 64 bpp. */
2134                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2136                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2137                                                  TILE_SPLIT(split_equal_to_row_size) |
2138                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2139                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2140                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2141                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2142                                 break;
2143                         case 21:  /* 8 bpp PRT. */
2144                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2146                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2147                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2148                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2149                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2152                                 break;
2153                         case 22:  /* 16 bpp PRT */
2154                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2156                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2157                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2158                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2159                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2162                                 break;
2163                         case 23:  /* 32 bpp PRT */
2164                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2166                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2167                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2168                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2169                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2171                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2172                                 break;
2173                         case 24:  /* 64 bpp PRT */
2174                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2176                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2177                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2178                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2179                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2182                                 break;
2183                         case 25:  /* 128 bpp PRT */
2184                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2186                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2187                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2189                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2191                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2192                                 break;
2193                         default:
2194                                 gb_tile_moden = 0;
2195                                 break;
2196                         }
2197                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2198                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2199                 }
2200         } else if ((rdev->family == CHIP_VERDE) ||
2201                    (rdev->family == CHIP_OLAND) ||
2202                    (rdev->family == CHIP_HAINAN)) {
2203                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2204                         switch (reg_offset) {
2205                         case 0:  /* non-AA compressed depth or any compressed stencil */
2206                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2208                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2209                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2210                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2211                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2212                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2213                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2214                                 break;
2215                         case 1:  /* 2xAA/4xAA compressed depth only */
2216                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2218                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2219                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2220                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2221                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2222                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2224                                 break;
2225                         case 2:  /* 8xAA compressed depth only */
2226                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2227                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2228                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2229                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2230                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2231                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2234                                 break;
2235                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2240                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2241                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2244                                 break;
2245                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2246                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2247                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2248                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2249                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2250                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2251                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2253                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2254                                 break;
2255                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2256                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2258                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2259                                                  TILE_SPLIT(split_equal_to_row_size) |
2260                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2261                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2264                                 break;
2265                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2266                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2268                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2269                                                  TILE_SPLIT(split_equal_to_row_size) |
2270                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2271                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2274                                 break;
2275                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2276                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2278                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2279                                                  TILE_SPLIT(split_equal_to_row_size) |
2280                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2281                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2283                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2284                                 break;
2285                         case 8:  /* 1D and 1D Array Surfaces */
2286                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2287                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2288                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2289                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2290                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2291                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2294                                 break;
2295                         case 9:  /* Displayable maps. */
2296                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2298                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2299                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2300                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2301                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2303                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2304                                 break;
2305                         case 10:  /* Display 8bpp. */
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2309                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2310                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2311                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2313                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2314                                 break;
2315                         case 11:  /* Display 16bpp. */
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2319                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2320                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2321                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2323                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2324                                 break;
2325                         case 12:  /* Display 32bpp. */
2326                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2328                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2329                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2330                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2331                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2333                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2334                                 break;
2335                         case 13:  /* Thin. */
2336                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2337                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2338                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2339                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2340                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2341                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2342                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2343                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2344                                 break;
2345                         case 14:  /* Thin 8 bpp. */
2346                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2348                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2349                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2350                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2351                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2353                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2354                                 break;
2355                         case 15:  /* Thin 16 bpp. */
2356                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2358                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2359                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2360                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2361                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2364                                 break;
2365                         case 16:  /* Thin 32 bpp. */
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2368                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2369                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2370                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2371                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2374                                 break;
2375                         case 17:  /* Thin 64 bpp. */
2376                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2377                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2378                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2379                                                  TILE_SPLIT(split_equal_to_row_size) |
2380                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2381                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2383                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2384                                 break;
2385                         case 21:  /* 8 bpp PRT. */
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2389                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2390                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2391                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2394                                 break;
2395                         case 22:  /* 16 bpp PRT */
2396                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2398                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2399                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2401                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2403                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2404                                 break;
2405                         case 23:  /* 32 bpp PRT */
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2409                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2411                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2413                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2414                                 break;
2415                         case 24:  /* 64 bpp PRT */
2416                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2418                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2419                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2420                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2421                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2424                                 break;
2425                         case 25:  /* 128 bpp PRT */
2426                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2428                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2429                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2430                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2431                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2434                                 break;
2435                         default:
2436                                 gb_tile_moden = 0;
2437                                 break;
2438                         }
2439                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2440                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2441                 }
2442         } else
2443                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2444 }
2445
2446 static void si_select_se_sh(struct radeon_device *rdev,
2447                             u32 se_num, u32 sh_num)
2448 {
2449         u32 data = INSTANCE_BROADCAST_WRITES;
2450
2451         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2452                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2453         else if (se_num == 0xffffffff)
2454                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2455         else if (sh_num == 0xffffffff)
2456                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2457         else
2458                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2459         WREG32(GRBM_GFX_INDEX, data);
2460 }
2461
2462 static u32 si_create_bitmask(u32 bit_width)
2463 {
2464         u32 i, mask = 0;
2465
2466         for (i = 0; i < bit_width; i++) {
2467                 mask <<= 1;
2468                 mask |= 1;
2469         }
2470         return mask;
2471 }
2472
2473 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2474 {
2475         u32 data, mask;
2476
2477         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2478         if (data & 1)
2479                 data &= INACTIVE_CUS_MASK;
2480         else
2481                 data = 0;
2482         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2483
2484         data >>= INACTIVE_CUS_SHIFT;
2485
2486         mask = si_create_bitmask(cu_per_sh);
2487
2488         return ~data & mask;
2489 }
2490
2491 static void si_setup_spi(struct radeon_device *rdev,
2492                          u32 se_num, u32 sh_per_se,
2493                          u32 cu_per_sh)
2494 {
2495         int i, j, k;
2496         u32 data, mask, active_cu;
2497
2498         for (i = 0; i < se_num; i++) {
2499                 for (j = 0; j < sh_per_se; j++) {
2500                         si_select_se_sh(rdev, i, j);
2501                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2502                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2503
2504                         mask = 1;
2505                         for (k = 0; k < 16; k++) {
2506                                 mask <<= k;
2507                                 if (active_cu & mask) {
2508                                         data &= ~mask;
2509                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2510                                         break;
2511                                 }
2512                         }
2513                 }
2514         }
2515         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2516 }
2517
2518 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2519                               u32 max_rb_num, u32 se_num,
2520                               u32 sh_per_se)
2521 {
2522         u32 data, mask;
2523
2524         data = RREG32(CC_RB_BACKEND_DISABLE);
2525         if (data & 1)
2526                 data &= BACKEND_DISABLE_MASK;
2527         else
2528                 data = 0;
2529         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2530
2531         data >>= BACKEND_DISABLE_SHIFT;
2532
2533         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2534
2535         return data & mask;
2536 }
2537
2538 static void si_setup_rb(struct radeon_device *rdev,
2539                         u32 se_num, u32 sh_per_se,
2540                         u32 max_rb_num)
2541 {
2542         int i, j;
2543         u32 data, mask;
2544         u32 disabled_rbs = 0;
2545         u32 enabled_rbs = 0;
2546
2547         for (i = 0; i < se_num; i++) {
2548                 for (j = 0; j < sh_per_se; j++) {
2549                         si_select_se_sh(rdev, i, j);
2550                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2551                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2552                 }
2553         }
2554         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2555
2556         mask = 1;
2557         for (i = 0; i < max_rb_num; i++) {
2558                 if (!(disabled_rbs & mask))
2559                         enabled_rbs |= mask;
2560                 mask <<= 1;
2561         }
2562
2563         for (i = 0; i < se_num; i++) {
2564                 si_select_se_sh(rdev, i, 0xffffffff);
2565                 data = 0;
2566                 for (j = 0; j < sh_per_se; j++) {
2567                         switch (enabled_rbs & 3) {
2568                         case 1:
2569                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2570                                 break;
2571                         case 2:
2572                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2573                                 break;
2574                         case 3:
2575                         default:
2576                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2577                                 break;
2578                         }
2579                         enabled_rbs >>= 2;
2580                 }
2581                 WREG32(PA_SC_RASTER_CONFIG, data);
2582         }
2583         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2584 }
2585
2586 static void si_gpu_init(struct radeon_device *rdev)
2587 {
2588         u32 gb_addr_config = 0;
2589         u32 mc_shared_chmap, mc_arb_ramcfg;
2590         u32 sx_debug_1;
2591         u32 hdp_host_path_cntl;
2592         u32 tmp;
2593         int i, j;
2594
2595         switch (rdev->family) {
2596         case CHIP_TAHITI:
2597                 rdev->config.si.max_shader_engines = 2;
2598                 rdev->config.si.max_tile_pipes = 12;
2599                 rdev->config.si.max_cu_per_sh = 8;
2600                 rdev->config.si.max_sh_per_se = 2;
2601                 rdev->config.si.max_backends_per_se = 4;
2602                 rdev->config.si.max_texture_channel_caches = 12;
2603                 rdev->config.si.max_gprs = 256;
2604                 rdev->config.si.max_gs_threads = 32;
2605                 rdev->config.si.max_hw_contexts = 8;
2606
2607                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2608                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2609                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2610                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2611                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2612                 break;
2613         case CHIP_PITCAIRN:
2614                 rdev->config.si.max_shader_engines = 2;
2615                 rdev->config.si.max_tile_pipes = 8;
2616                 rdev->config.si.max_cu_per_sh = 5;
2617                 rdev->config.si.max_sh_per_se = 2;
2618                 rdev->config.si.max_backends_per_se = 4;
2619                 rdev->config.si.max_texture_channel_caches = 8;
2620                 rdev->config.si.max_gprs = 256;
2621                 rdev->config.si.max_gs_threads = 32;
2622                 rdev->config.si.max_hw_contexts = 8;
2623
2624                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2625                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2626                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2627                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2628                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2629                 break;
2630         case CHIP_VERDE:
2631         default:
2632                 rdev->config.si.max_shader_engines = 1;
2633                 rdev->config.si.max_tile_pipes = 4;
2634                 rdev->config.si.max_cu_per_sh = 5;
2635                 rdev->config.si.max_sh_per_se = 2;
2636                 rdev->config.si.max_backends_per_se = 4;
2637                 rdev->config.si.max_texture_channel_caches = 4;
2638                 rdev->config.si.max_gprs = 256;
2639                 rdev->config.si.max_gs_threads = 32;
2640                 rdev->config.si.max_hw_contexts = 8;
2641
2642                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2643                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2644                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2645                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2646                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2647                 break;
2648         case CHIP_OLAND:
2649                 rdev->config.si.max_shader_engines = 1;
2650                 rdev->config.si.max_tile_pipes = 4;
2651                 rdev->config.si.max_cu_per_sh = 6;
2652                 rdev->config.si.max_sh_per_se = 1;
2653                 rdev->config.si.max_backends_per_se = 2;
2654                 rdev->config.si.max_texture_channel_caches = 4;
2655                 rdev->config.si.max_gprs = 256;
2656                 rdev->config.si.max_gs_threads = 16;
2657                 rdev->config.si.max_hw_contexts = 8;
2658
2659                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2660                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2661                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2662                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2663                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2664                 break;
2665         case CHIP_HAINAN:
2666                 rdev->config.si.max_shader_engines = 1;
2667                 rdev->config.si.max_tile_pipes = 4;
2668                 rdev->config.si.max_cu_per_sh = 5;
2669                 rdev->config.si.max_sh_per_se = 1;
2670                 rdev->config.si.max_backends_per_se = 1;
2671                 rdev->config.si.max_texture_channel_caches = 2;
2672                 rdev->config.si.max_gprs = 256;
2673                 rdev->config.si.max_gs_threads = 16;
2674                 rdev->config.si.max_hw_contexts = 8;
2675
2676                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2677                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2678                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2679                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2680                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2681                 break;
2682         }
2683
2684         /* Initialize HDP */
2685         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2686                 WREG32((0x2c14 + j), 0x00000000);
2687                 WREG32((0x2c18 + j), 0x00000000);
2688                 WREG32((0x2c1c + j), 0x00000000);
2689                 WREG32((0x2c20 + j), 0x00000000);
2690                 WREG32((0x2c24 + j), 0x00000000);
2691         }
2692
2693         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2694
2695         evergreen_fix_pci_max_read_req_size(rdev);
2696
2697         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2698
2699         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2700         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2701
2702         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2703         rdev->config.si.mem_max_burst_length_bytes = 256;
2704         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2705         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2706         if (rdev->config.si.mem_row_size_in_kb > 4)
2707                 rdev->config.si.mem_row_size_in_kb = 4;
2708         /* XXX use MC settings? */
2709         rdev->config.si.shader_engine_tile_size = 32;
2710         rdev->config.si.num_gpus = 1;
2711         rdev->config.si.multi_gpu_tile_size = 64;
2712
2713         /* fix up row size */
2714         gb_addr_config &= ~ROW_SIZE_MASK;
2715         switch (rdev->config.si.mem_row_size_in_kb) {
2716         case 1:
2717         default:
2718                 gb_addr_config |= ROW_SIZE(0);
2719                 break;
2720         case 2:
2721                 gb_addr_config |= ROW_SIZE(1);
2722                 break;
2723         case 4:
2724                 gb_addr_config |= ROW_SIZE(2);
2725                 break;
2726         }
2727
2728         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2729          * not have bank info, so create a custom tiling dword.
2730          * bits 3:0   num_pipes
2731          * bits 7:4   num_banks
2732          * bits 11:8  group_size
2733          * bits 15:12 row_size
2734          */
2735         rdev->config.si.tile_config = 0;
2736         switch (rdev->config.si.num_tile_pipes) {
2737         case 1:
2738                 rdev->config.si.tile_config |= (0 << 0);
2739                 break;
2740         case 2:
2741                 rdev->config.si.tile_config |= (1 << 0);
2742                 break;
2743         case 4:
2744                 rdev->config.si.tile_config |= (2 << 0);
2745                 break;
2746         case 8:
2747         default:
2748                 /* XXX what about 12? */
2749                 rdev->config.si.tile_config |= (3 << 0);
2750                 break;
2751         }       
2752         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2753         case 0: /* four banks */
2754                 rdev->config.si.tile_config |= 0 << 4;
2755                 break;
2756         case 1: /* eight banks */
2757                 rdev->config.si.tile_config |= 1 << 4;
2758                 break;
2759         case 2: /* sixteen banks */
2760         default:
2761                 rdev->config.si.tile_config |= 2 << 4;
2762                 break;
2763         }
2764         rdev->config.si.tile_config |=
2765                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2766         rdev->config.si.tile_config |=
2767                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2768
2769         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2770         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2771         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2772         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2773         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2774         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2775         if (rdev->has_uvd) {
2776                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2777                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2778                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2779         }
2780
2781         si_tiling_mode_table_init(rdev);
2782
2783         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2784                     rdev->config.si.max_sh_per_se,
2785                     rdev->config.si.max_backends_per_se);
2786
2787         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2788                      rdev->config.si.max_sh_per_se,
2789                      rdev->config.si.max_cu_per_sh);
2790
2791
2792         /* set HW defaults for 3D engine */
2793         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2794                                      ROQ_IB2_START(0x2b)));
2795         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2796
2797         sx_debug_1 = RREG32(SX_DEBUG_1);
2798         WREG32(SX_DEBUG_1, sx_debug_1);
2799
2800         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2801
2802         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2803                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2804                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2805                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2806
2807         WREG32(VGT_NUM_INSTANCES, 1);
2808
2809         WREG32(CP_PERFMON_CNTL, 0);
2810
2811         WREG32(SQ_CONFIG, 0);
2812
2813         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2814                                           FORCE_EOV_MAX_REZ_CNT(255)));
2815
2816         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2817                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2818
2819         WREG32(VGT_GS_VERTEX_REUSE, 16);
2820         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2821
2822         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2823         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2824         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2825         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2826         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2827         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2828         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2829         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2830
2831         tmp = RREG32(HDP_MISC_CNTL);
2832         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2833         WREG32(HDP_MISC_CNTL, tmp);
2834
2835         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2836         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2837
2838         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2839
2840         udelay(50);
2841 }
2842
2843 /*
2844  * GPU scratch registers helpers function.
2845  */
2846 static void si_scratch_init(struct radeon_device *rdev)
2847 {
2848         int i;
2849
2850         rdev->scratch.num_reg = 7;
2851         rdev->scratch.reg_base = SCRATCH_REG0;
2852         for (i = 0; i < rdev->scratch.num_reg; i++) {
2853                 rdev->scratch.free[i] = true;
2854                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2855         }
2856 }
2857
2858 void si_fence_ring_emit(struct radeon_device *rdev,
2859                         struct radeon_fence *fence)
2860 {
2861         struct radeon_ring *ring = &rdev->ring[fence->ring];
2862         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2863
2864         /* flush read cache over gart */
2865         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2866         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2867         radeon_ring_write(ring, 0);
2868         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2869         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2870                           PACKET3_TC_ACTION_ENA |
2871                           PACKET3_SH_KCACHE_ACTION_ENA |
2872                           PACKET3_SH_ICACHE_ACTION_ENA);
2873         radeon_ring_write(ring, 0xFFFFFFFF);
2874         radeon_ring_write(ring, 0);
2875         radeon_ring_write(ring, 10); /* poll interval */
2876         /* EVENT_WRITE_EOP - flush caches, send int */
2877         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2878         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2879         radeon_ring_write(ring, addr & 0xffffffff);
2880         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2881         radeon_ring_write(ring, fence->seq);
2882         radeon_ring_write(ring, 0);
2883 }
2884
2885 /*
2886  * IB stuff
2887  */
2888 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2889 {
2890         struct radeon_ring *ring = &rdev->ring[ib->ring];
2891         u32 header;
2892
2893         if (ib->is_const_ib) {
2894                 /* set switch buffer packet before const IB */
2895                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2896                 radeon_ring_write(ring, 0);
2897
2898                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2899         } else {
2900                 u32 next_rptr;
2901                 if (ring->rptr_save_reg) {
2902                         next_rptr = ring->wptr + 3 + 4 + 8;
2903                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2904                         radeon_ring_write(ring, ((ring->rptr_save_reg -
2905                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
2906                         radeon_ring_write(ring, next_rptr);
2907                 } else if (rdev->wb.enabled) {
2908                         next_rptr = ring->wptr + 5 + 4 + 8;
2909                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2910                         radeon_ring_write(ring, (1 << 8));
2911                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2912                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2913                         radeon_ring_write(ring, next_rptr);
2914                 }
2915
2916                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2917         }
2918
2919         radeon_ring_write(ring, header);
2920         radeon_ring_write(ring,
2921 #ifdef __BIG_ENDIAN
2922                           (2 << 0) |
2923 #endif
2924                           (ib->gpu_addr & 0xFFFFFFFC));
2925         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2926         radeon_ring_write(ring, ib->length_dw |
2927                           (ib->vm ? (ib->vm->id << 24) : 0));
2928
2929         if (!ib->is_const_ib) {
2930                 /* flush read cache over gart for this vmid */
2931                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2932                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2933                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2934                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2935                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2936                                   PACKET3_TC_ACTION_ENA |
2937                                   PACKET3_SH_KCACHE_ACTION_ENA |
2938                                   PACKET3_SH_ICACHE_ACTION_ENA);
2939                 radeon_ring_write(ring, 0xFFFFFFFF);
2940                 radeon_ring_write(ring, 0);
2941                 radeon_ring_write(ring, 10); /* poll interval */
2942         }
2943 }
2944
2945 /*
2946  * CP.
2947  */
2948 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2949 {
2950         if (enable)
2951                 WREG32(CP_ME_CNTL, 0);
2952         else {
2953                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2954                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2955                 WREG32(SCRATCH_UMSK, 0);
2956                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2957                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2958                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2959         }
2960         udelay(50);
2961 }
2962
2963 static int si_cp_load_microcode(struct radeon_device *rdev)
2964 {
2965         const __be32 *fw_data;
2966         int i;
2967
2968         if (!rdev->me_fw || !rdev->pfp_fw)
2969                 return -EINVAL;
2970
2971         si_cp_enable(rdev, false);
2972
2973         /* PFP */
2974         fw_data = (const __be32 *)rdev->pfp_fw->data;
2975         WREG32(CP_PFP_UCODE_ADDR, 0);
2976         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2977                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2978         WREG32(CP_PFP_UCODE_ADDR, 0);
2979
2980         /* CE */
2981         fw_data = (const __be32 *)rdev->ce_fw->data;
2982         WREG32(CP_CE_UCODE_ADDR, 0);
2983         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2984                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2985         WREG32(CP_CE_UCODE_ADDR, 0);
2986
2987         /* ME */
2988         fw_data = (const __be32 *)rdev->me_fw->data;
2989         WREG32(CP_ME_RAM_WADDR, 0);
2990         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2991                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2992         WREG32(CP_ME_RAM_WADDR, 0);
2993
2994         WREG32(CP_PFP_UCODE_ADDR, 0);
2995         WREG32(CP_CE_UCODE_ADDR, 0);
2996         WREG32(CP_ME_RAM_WADDR, 0);
2997         WREG32(CP_ME_RAM_RADDR, 0);
2998         return 0;
2999 }
3000
3001 static int si_cp_start(struct radeon_device *rdev)
3002 {
3003         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3004         int r, i;
3005
3006         r = radeon_ring_lock(rdev, ring, 7 + 4);
3007         if (r) {
3008                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3009                 return r;
3010         }
3011         /* init the CP */
3012         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3013         radeon_ring_write(ring, 0x1);
3014         radeon_ring_write(ring, 0x0);
3015         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3016         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3017         radeon_ring_write(ring, 0);
3018         radeon_ring_write(ring, 0);
3019
3020         /* init the CE partitions */
3021         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3022         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3023         radeon_ring_write(ring, 0xc000);
3024         radeon_ring_write(ring, 0xe000);
3025         radeon_ring_unlock_commit(rdev, ring);
3026
3027         si_cp_enable(rdev, true);
3028
3029         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3030         if (r) {
3031                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3032                 return r;
3033         }
3034
3035         /* setup clear context state */
3036         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3037         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3038
3039         for (i = 0; i < si_default_size; i++)
3040                 radeon_ring_write(ring, si_default_state[i]);
3041
3042         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3043         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3044
3045         /* set clear context state */
3046         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3047         radeon_ring_write(ring, 0);
3048
3049         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3050         radeon_ring_write(ring, 0x00000316);
3051         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3052         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3053
3054         radeon_ring_unlock_commit(rdev, ring);
3055
3056         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3057                 ring = &rdev->ring[i];
3058                 r = radeon_ring_lock(rdev, ring, 2);
3059
3060                 /* clear the compute context state */
3061                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3062                 radeon_ring_write(ring, 0);
3063
3064                 radeon_ring_unlock_commit(rdev, ring);
3065         }
3066
3067         return 0;
3068 }
3069
3070 static void si_cp_fini(struct radeon_device *rdev)
3071 {
3072         struct radeon_ring *ring;
3073         si_cp_enable(rdev, false);
3074
3075         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3076         radeon_ring_fini(rdev, ring);
3077         radeon_scratch_free(rdev, ring->rptr_save_reg);
3078
3079         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3080         radeon_ring_fini(rdev, ring);
3081         radeon_scratch_free(rdev, ring->rptr_save_reg);
3082
3083         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3084         radeon_ring_fini(rdev, ring);
3085         radeon_scratch_free(rdev, ring->rptr_save_reg);
3086 }
3087
3088 static int si_cp_resume(struct radeon_device *rdev)
3089 {
3090         struct radeon_ring *ring;
3091         u32 tmp;
3092         u32 rb_bufsz;
3093         int r;
3094
3095         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3096         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3097                                  SOFT_RESET_PA |
3098                                  SOFT_RESET_VGT |
3099                                  SOFT_RESET_SPI |
3100                                  SOFT_RESET_SX));
3101         RREG32(GRBM_SOFT_RESET);
3102         mdelay(15);
3103         WREG32(GRBM_SOFT_RESET, 0);
3104         RREG32(GRBM_SOFT_RESET);
3105
3106         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3107         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3108
3109         /* Set the write pointer delay */
3110         WREG32(CP_RB_WPTR_DELAY, 0);
3111
3112         WREG32(CP_DEBUG, 0);
3113         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3114
3115         /* ring 0 - compute and gfx */
3116         /* Set ring buffer size */
3117         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3118         rb_bufsz = drm_order(ring->ring_size / 8);
3119         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3120 #ifdef __BIG_ENDIAN
3121         tmp |= BUF_SWAP_32BIT;
3122 #endif
3123         WREG32(CP_RB0_CNTL, tmp);
3124
3125         /* Initialize the ring buffer's read and write pointers */
3126         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3127         ring->wptr = 0;
3128         WREG32(CP_RB0_WPTR, ring->wptr);
3129
3130         /* set the wb address whether it's enabled or not */
3131         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3132         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3133
3134         if (rdev->wb.enabled)
3135                 WREG32(SCRATCH_UMSK, 0xff);
3136         else {
3137                 tmp |= RB_NO_UPDATE;
3138                 WREG32(SCRATCH_UMSK, 0);
3139         }
3140
3141         mdelay(1);
3142         WREG32(CP_RB0_CNTL, tmp);
3143
3144         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3145
3146         ring->rptr = RREG32(CP_RB0_RPTR);
3147
3148         /* ring1  - compute only */
3149         /* Set ring buffer size */
3150         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3151         rb_bufsz = drm_order(ring->ring_size / 8);
3152         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3153 #ifdef __BIG_ENDIAN
3154         tmp |= BUF_SWAP_32BIT;
3155 #endif
3156         WREG32(CP_RB1_CNTL, tmp);
3157
3158         /* Initialize the ring buffer's read and write pointers */
3159         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3160         ring->wptr = 0;
3161         WREG32(CP_RB1_WPTR, ring->wptr);
3162
3163         /* set the wb address whether it's enabled or not */
3164         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3165         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3166
3167         mdelay(1);
3168         WREG32(CP_RB1_CNTL, tmp);
3169
3170         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3171
3172         ring->rptr = RREG32(CP_RB1_RPTR);
3173
3174         /* ring2 - compute only */
3175         /* Set ring buffer size */
3176         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3177         rb_bufsz = drm_order(ring->ring_size / 8);
3178         tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3179 #ifdef __BIG_ENDIAN
3180         tmp |= BUF_SWAP_32BIT;
3181 #endif
3182         WREG32(CP_RB2_CNTL, tmp);
3183
3184         /* Initialize the ring buffer's read and write pointers */
3185         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3186         ring->wptr = 0;
3187         WREG32(CP_RB2_WPTR, ring->wptr);
3188
3189         /* set the wb address whether it's enabled or not */
3190         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3191         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3192
3193         mdelay(1);
3194         WREG32(CP_RB2_CNTL, tmp);
3195
3196         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3197
3198         ring->rptr = RREG32(CP_RB2_RPTR);
3199
3200         /* start the rings */
3201         si_cp_start(rdev);
3202         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3203         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3204         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3205         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3206         if (r) {
3207                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3208                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3209                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3210                 return r;
3211         }
3212         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3213         if (r) {
3214                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3215         }
3216         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3217         if (r) {
3218                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3219         }
3220
3221         return 0;
3222 }
3223
3224 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3225 {
3226         u32 reset_mask = 0;
3227         u32 tmp;
3228
3229         /* GRBM_STATUS */
3230         tmp = RREG32(GRBM_STATUS);
3231         if (tmp & (PA_BUSY | SC_BUSY |
3232                    BCI_BUSY | SX_BUSY |
3233                    TA_BUSY | VGT_BUSY |
3234                    DB_BUSY | CB_BUSY |
3235                    GDS_BUSY | SPI_BUSY |
3236                    IA_BUSY | IA_BUSY_NO_DMA))
3237                 reset_mask |= RADEON_RESET_GFX;
3238
3239         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3240                    CP_BUSY | CP_COHERENCY_BUSY))
3241                 reset_mask |= RADEON_RESET_CP;
3242
3243         if (tmp & GRBM_EE_BUSY)
3244                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3245
3246         /* GRBM_STATUS2 */
3247         tmp = RREG32(GRBM_STATUS2);
3248         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3249                 reset_mask |= RADEON_RESET_RLC;
3250
3251         /* DMA_STATUS_REG 0 */
3252         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3253         if (!(tmp & DMA_IDLE))
3254                 reset_mask |= RADEON_RESET_DMA;
3255
3256         /* DMA_STATUS_REG 1 */
3257         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3258         if (!(tmp & DMA_IDLE))
3259                 reset_mask |= RADEON_RESET_DMA1;
3260
3261         /* SRBM_STATUS2 */
3262         tmp = RREG32(SRBM_STATUS2);
3263         if (tmp & DMA_BUSY)
3264                 reset_mask |= RADEON_RESET_DMA;
3265
3266         if (tmp & DMA1_BUSY)
3267                 reset_mask |= RADEON_RESET_DMA1;
3268
3269         /* SRBM_STATUS */
3270         tmp = RREG32(SRBM_STATUS);
3271
3272         if (tmp & IH_BUSY)
3273                 reset_mask |= RADEON_RESET_IH;
3274
3275         if (tmp & SEM_BUSY)
3276                 reset_mask |= RADEON_RESET_SEM;
3277
3278         if (tmp & GRBM_RQ_PENDING)
3279                 reset_mask |= RADEON_RESET_GRBM;
3280
3281         if (tmp & VMC_BUSY)
3282                 reset_mask |= RADEON_RESET_VMC;
3283
3284         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3285                    MCC_BUSY | MCD_BUSY))
3286                 reset_mask |= RADEON_RESET_MC;
3287
3288         if (evergreen_is_display_hung(rdev))
3289                 reset_mask |= RADEON_RESET_DISPLAY;
3290
3291         /* VM_L2_STATUS */
3292         tmp = RREG32(VM_L2_STATUS);
3293         if (tmp & L2_BUSY)
3294                 reset_mask |= RADEON_RESET_VMC;
3295
3296         /* Skip MC reset as it's mostly likely not hung, just busy */
3297         if (reset_mask & RADEON_RESET_MC) {
3298                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3299                 reset_mask &= ~RADEON_RESET_MC;
3300         }
3301
3302         return reset_mask;
3303 }
3304
3305 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3306 {
3307         struct evergreen_mc_save save;
3308         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3309         u32 tmp;
3310
3311         if (reset_mask == 0)
3312                 return;
3313
3314         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3315
3316         evergreen_print_gpu_status_regs(rdev);
3317         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3318                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3319         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3320                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3321
3322         /* Disable CP parsing/prefetching */
3323         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3324
3325         if (reset_mask & RADEON_RESET_DMA) {
3326                 /* dma0 */
3327                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3328                 tmp &= ~DMA_RB_ENABLE;
3329                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3330         }
3331         if (reset_mask & RADEON_RESET_DMA1) {
3332                 /* dma1 */
3333                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3334                 tmp &= ~DMA_RB_ENABLE;
3335                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3336         }
3337
3338         udelay(50);
3339
3340         evergreen_mc_stop(rdev, &save);
3341         if (evergreen_mc_wait_for_idle(rdev)) {
3342                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3343         }
3344
3345         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3346                 grbm_soft_reset = SOFT_RESET_CB |
3347                         SOFT_RESET_DB |
3348                         SOFT_RESET_GDS |
3349                         SOFT_RESET_PA |
3350                         SOFT_RESET_SC |
3351                         SOFT_RESET_BCI |
3352                         SOFT_RESET_SPI |
3353                         SOFT_RESET_SX |
3354                         SOFT_RESET_TC |
3355                         SOFT_RESET_TA |
3356                         SOFT_RESET_VGT |
3357                         SOFT_RESET_IA;
3358         }
3359
3360         if (reset_mask & RADEON_RESET_CP) {
3361                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3362
3363                 srbm_soft_reset |= SOFT_RESET_GRBM;
3364         }
3365
3366         if (reset_mask & RADEON_RESET_DMA)
3367                 srbm_soft_reset |= SOFT_RESET_DMA;
3368
3369         if (reset_mask & RADEON_RESET_DMA1)
3370                 srbm_soft_reset |= SOFT_RESET_DMA1;
3371
3372         if (reset_mask & RADEON_RESET_DISPLAY)
3373                 srbm_soft_reset |= SOFT_RESET_DC;
3374
3375         if (reset_mask & RADEON_RESET_RLC)
3376                 grbm_soft_reset |= SOFT_RESET_RLC;
3377
3378         if (reset_mask & RADEON_RESET_SEM)
3379                 srbm_soft_reset |= SOFT_RESET_SEM;
3380
3381         if (reset_mask & RADEON_RESET_IH)
3382                 srbm_soft_reset |= SOFT_RESET_IH;
3383
3384         if (reset_mask & RADEON_RESET_GRBM)
3385                 srbm_soft_reset |= SOFT_RESET_GRBM;
3386
3387         if (reset_mask & RADEON_RESET_VMC)
3388                 srbm_soft_reset |= SOFT_RESET_VMC;
3389
3390         if (reset_mask & RADEON_RESET_MC)
3391                 srbm_soft_reset |= SOFT_RESET_MC;
3392
3393         if (grbm_soft_reset) {
3394                 tmp = RREG32(GRBM_SOFT_RESET);
3395                 tmp |= grbm_soft_reset;
3396                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3397                 WREG32(GRBM_SOFT_RESET, tmp);
3398                 tmp = RREG32(GRBM_SOFT_RESET);
3399
3400                 udelay(50);
3401
3402                 tmp &= ~grbm_soft_reset;
3403                 WREG32(GRBM_SOFT_RESET, tmp);
3404                 tmp = RREG32(GRBM_SOFT_RESET);
3405         }
3406
3407         if (srbm_soft_reset) {
3408                 tmp = RREG32(SRBM_SOFT_RESET);
3409                 tmp |= srbm_soft_reset;
3410                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3411                 WREG32(SRBM_SOFT_RESET, tmp);
3412                 tmp = RREG32(SRBM_SOFT_RESET);
3413
3414                 udelay(50);
3415
3416                 tmp &= ~srbm_soft_reset;
3417                 WREG32(SRBM_SOFT_RESET, tmp);
3418                 tmp = RREG32(SRBM_SOFT_RESET);
3419         }
3420
3421         /* Wait a little for things to settle down */
3422         udelay(50);
3423
3424         evergreen_mc_resume(rdev, &save);
3425         udelay(50);
3426
3427         evergreen_print_gpu_status_regs(rdev);
3428 }
3429
3430 int si_asic_reset(struct radeon_device *rdev)
3431 {
3432         u32 reset_mask;
3433
3434         reset_mask = si_gpu_check_soft_reset(rdev);
3435
3436         if (reset_mask)
3437                 r600_set_bios_scratch_engine_hung(rdev, true);
3438
3439         si_gpu_soft_reset(rdev, reset_mask);
3440
3441         reset_mask = si_gpu_check_soft_reset(rdev);
3442
3443         if (!reset_mask)
3444                 r600_set_bios_scratch_engine_hung(rdev, false);
3445
3446         return 0;
3447 }
3448
3449 /**
3450  * si_gfx_is_lockup - Check if the GFX engine is locked up
3451  *
3452  * @rdev: radeon_device pointer
3453  * @ring: radeon_ring structure holding ring information
3454  *
3455  * Check if the GFX engine is locked up.
3456  * Returns true if the engine appears to be locked up, false if not.
3457  */
3458 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3459 {
3460         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3461
3462         if (!(reset_mask & (RADEON_RESET_GFX |
3463                             RADEON_RESET_COMPUTE |
3464                             RADEON_RESET_CP))) {
3465                 radeon_ring_lockup_update(ring);
3466                 return false;
3467         }
3468         /* force CP activities */
3469         radeon_ring_force_activity(rdev, ring);
3470         return radeon_ring_test_lockup(rdev, ring);
3471 }
3472
3473 /**
3474  * si_dma_is_lockup - Check if the DMA engine is locked up
3475  *
3476  * @rdev: radeon_device pointer
3477  * @ring: radeon_ring structure holding ring information
3478  *
3479  * Check if the async DMA engine is locked up.
3480  * Returns true if the engine appears to be locked up, false if not.
3481  */
3482 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3483 {
3484         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3485         u32 mask;
3486
3487         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3488                 mask = RADEON_RESET_DMA;
3489         else
3490                 mask = RADEON_RESET_DMA1;
3491
3492         if (!(reset_mask & mask)) {
3493                 radeon_ring_lockup_update(ring);
3494                 return false;
3495         }
3496         /* force ring activities */
3497         radeon_ring_force_activity(rdev, ring);
3498         return radeon_ring_test_lockup(rdev, ring);
3499 }
3500
3501 /* MC */
3502 static void si_mc_program(struct radeon_device *rdev)
3503 {
3504         struct evergreen_mc_save save;
3505         u32 tmp;
3506         int i, j;
3507
3508         /* Initialize HDP */
3509         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3510                 WREG32((0x2c14 + j), 0x00000000);
3511                 WREG32((0x2c18 + j), 0x00000000);
3512                 WREG32((0x2c1c + j), 0x00000000);
3513                 WREG32((0x2c20 + j), 0x00000000);
3514                 WREG32((0x2c24 + j), 0x00000000);
3515         }
3516         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3517
3518         evergreen_mc_stop(rdev, &save);
3519         if (radeon_mc_wait_for_idle(rdev)) {
3520                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3521         }
3522         if (!ASIC_IS_NODCE(rdev))
3523                 /* Lockout access through VGA aperture*/
3524                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3525         /* Update configuration */
3526         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3527                rdev->mc.vram_start >> 12);
3528         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3529                rdev->mc.vram_end >> 12);
3530         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3531                rdev->vram_scratch.gpu_addr >> 12);
3532         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3533         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3534         WREG32(MC_VM_FB_LOCATION, tmp);
3535         /* XXX double check these! */
3536         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3537         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3538         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3539         WREG32(MC_VM_AGP_BASE, 0);
3540         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3541         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3542         if (radeon_mc_wait_for_idle(rdev)) {
3543                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3544         }
3545         evergreen_mc_resume(rdev, &save);
3546         if (!ASIC_IS_NODCE(rdev)) {
3547                 /* we need to own VRAM, so turn off the VGA renderer here
3548                  * to stop it overwriting our objects */
3549                 rv515_vga_render_disable(rdev);
3550         }
3551 }
3552
3553 static void si_vram_gtt_location(struct radeon_device *rdev,
3554                                  struct radeon_mc *mc)
3555 {
3556         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3557                 /* leave room for at least 1024M GTT */
3558                 dev_warn(rdev->dev, "limiting VRAM\n");
3559                 mc->real_vram_size = 0xFFC0000000ULL;
3560                 mc->mc_vram_size = 0xFFC0000000ULL;
3561         }
3562         radeon_vram_location(rdev, &rdev->mc, 0);
3563         rdev->mc.gtt_base_align = 0;
3564         radeon_gtt_location(rdev, mc);
3565 }
3566
3567 static int si_mc_init(struct radeon_device *rdev)
3568 {
3569         u32 tmp;
3570         int chansize, numchan;
3571
3572         /* Get VRAM informations */
3573         rdev->mc.vram_is_ddr = true;
3574         tmp = RREG32(MC_ARB_RAMCFG);
3575         if (tmp & CHANSIZE_OVERRIDE) {
3576                 chansize = 16;
3577         } else if (tmp & CHANSIZE_MASK) {
3578                 chansize = 64;
3579         } else {
3580                 chansize = 32;
3581         }
3582         tmp = RREG32(MC_SHARED_CHMAP);
3583         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3584         case 0:
3585         default:
3586                 numchan = 1;
3587                 break;
3588         case 1:
3589                 numchan = 2;
3590                 break;
3591         case 2:
3592                 numchan = 4;
3593                 break;
3594         case 3:
3595                 numchan = 8;
3596                 break;
3597         case 4:
3598                 numchan = 3;
3599                 break;
3600         case 5:
3601                 numchan = 6;
3602                 break;
3603         case 6:
3604                 numchan = 10;
3605                 break;
3606         case 7:
3607                 numchan = 12;
3608                 break;
3609         case 8:
3610                 numchan = 16;
3611                 break;
3612         }
3613         rdev->mc.vram_width = numchan * chansize;
3614         /* Could aper size report 0 ? */
3615         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3616         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3617         /* size in MB on si */
3618         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3619         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3620         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3621         si_vram_gtt_location(rdev, &rdev->mc);
3622         radeon_update_bandwidth_info(rdev);
3623
3624         return 0;
3625 }
3626
3627 /*
3628  * GART
3629  */
3630 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3631 {
3632         /* flush hdp cache */
3633         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3634
3635         /* bits 0-15 are the VM contexts0-15 */
3636         WREG32(VM_INVALIDATE_REQUEST, 1);
3637 }
3638
3639 static int si_pcie_gart_enable(struct radeon_device *rdev)
3640 {
3641         int r, i;
3642
3643         if (rdev->gart.robj == NULL) {
3644                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3645                 return -EINVAL;
3646         }
3647         r = radeon_gart_table_vram_pin(rdev);
3648         if (r)
3649                 return r;
3650         radeon_gart_restore(rdev);
3651         /* Setup TLB control */
3652         WREG32(MC_VM_MX_L1_TLB_CNTL,
3653                (0xA << 7) |
3654                ENABLE_L1_TLB |
3655                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3656                ENABLE_ADVANCED_DRIVER_MODEL |
3657                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3658         /* Setup L2 cache */
3659         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3660                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3661                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3662                EFFECTIVE_L2_QUEUE_SIZE(7) |
3663                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3664         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3665         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3666                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3667         /* setup context0 */
3668         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3669         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3670         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3671         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3672                         (u32)(rdev->dummy_page.addr >> 12));
3673         WREG32(VM_CONTEXT0_CNTL2, 0);
3674         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3675                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3676
3677         WREG32(0x15D4, 0);
3678         WREG32(0x15D8, 0);
3679         WREG32(0x15DC, 0);
3680
3681         /* empty context1-15 */
3682         /* set vm size, must be a multiple of 4 */
3683         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3684         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3685         /* Assign the pt base to something valid for now; the pts used for
3686          * the VMs are determined by the application and setup and assigned
3687          * on the fly in the vm part of radeon_gart.c
3688          */
3689         for (i = 1; i < 16; i++) {
3690                 if (i < 8)
3691                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3692                                rdev->gart.table_addr >> 12);
3693                 else
3694                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3695                                rdev->gart.table_addr >> 12);
3696         }
3697
3698         /* enable context1-15 */
3699         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3700                (u32)(rdev->dummy_page.addr >> 12));
3701         WREG32(VM_CONTEXT1_CNTL2, 4);
3702         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3703                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3704                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3705                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3706                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3707                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3708                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3709                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3710                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3711                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3712                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3713                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3714                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3715
3716         si_pcie_gart_tlb_flush(rdev);
3717         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3718                  (unsigned)(rdev->mc.gtt_size >> 20),
3719                  (unsigned long long)rdev->gart.table_addr);
3720         rdev->gart.ready = true;
3721         return 0;
3722 }
3723
3724 static void si_pcie_gart_disable(struct radeon_device *rdev)
3725 {
3726         /* Disable all tables */
3727         WREG32(VM_CONTEXT0_CNTL, 0);
3728         WREG32(VM_CONTEXT1_CNTL, 0);
3729         /* Setup TLB control */
3730         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3731                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3732         /* Setup L2 cache */
3733         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3734                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3735                EFFECTIVE_L2_QUEUE_SIZE(7) |
3736                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3737         WREG32(VM_L2_CNTL2, 0);
3738         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3739                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3740         radeon_gart_table_vram_unpin(rdev);
3741 }
3742
3743 static void si_pcie_gart_fini(struct radeon_device *rdev)
3744 {
3745         si_pcie_gart_disable(rdev);
3746         radeon_gart_table_vram_free(rdev);
3747         radeon_gart_fini(rdev);
3748 }
3749
3750 /* vm parser */
3751 static bool si_vm_reg_valid(u32 reg)
3752 {
3753         /* context regs are fine */
3754         if (reg >= 0x28000)
3755                 return true;
3756
3757         /* check config regs */
3758         switch (reg) {
3759         case GRBM_GFX_INDEX:
3760         case CP_STRMOUT_CNTL:
3761         case VGT_VTX_VECT_EJECT_REG:
3762         case VGT_CACHE_INVALIDATION:
3763         case VGT_ESGS_RING_SIZE:
3764         case VGT_GSVS_RING_SIZE:
3765         case VGT_GS_VERTEX_REUSE:
3766         case VGT_PRIMITIVE_TYPE:
3767         case VGT_INDEX_TYPE:
3768         case VGT_NUM_INDICES:
3769         case VGT_NUM_INSTANCES:
3770         case VGT_TF_RING_SIZE:
3771         case VGT_HS_OFFCHIP_PARAM:
3772         case VGT_TF_MEMORY_BASE:
3773         case PA_CL_ENHANCE:
3774         case PA_SU_LINE_STIPPLE_VALUE:
3775         case PA_SC_LINE_STIPPLE_STATE:
3776         case PA_SC_ENHANCE:
3777         case SQC_CACHES:
3778         case SPI_STATIC_THREAD_MGMT_1:
3779         case SPI_STATIC_THREAD_MGMT_2:
3780         case SPI_STATIC_THREAD_MGMT_3:
3781         case SPI_PS_MAX_WAVE_ID:
3782         case SPI_CONFIG_CNTL:
3783         case SPI_CONFIG_CNTL_1:
3784         case TA_CNTL_AUX:
3785                 return true;
3786         default:
3787                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3788                 return false;
3789         }
3790 }
3791
3792 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3793                                   u32 *ib, struct radeon_cs_packet *pkt)
3794 {
3795         switch (pkt->opcode) {
3796         case PACKET3_NOP:
3797         case PACKET3_SET_BASE:
3798         case PACKET3_SET_CE_DE_COUNTERS:
3799         case PACKET3_LOAD_CONST_RAM:
3800         case PACKET3_WRITE_CONST_RAM:
3801         case PACKET3_WRITE_CONST_RAM_OFFSET:
3802         case PACKET3_DUMP_CONST_RAM:
3803         case PACKET3_INCREMENT_CE_COUNTER:
3804         case PACKET3_WAIT_ON_DE_COUNTER:
3805         case PACKET3_CE_WRITE:
3806                 break;
3807         default:
3808                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3809                 return -EINVAL;
3810         }
3811         return 0;
3812 }
3813
3814 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
3815 {
3816         u32 start_reg, reg, i;
3817         u32 command = ib[idx + 4];
3818         u32 info = ib[idx + 1];
3819         u32 idx_value = ib[idx];
3820         if (command & PACKET3_CP_DMA_CMD_SAS) {
3821                 /* src address space is register */
3822                 if (((info & 0x60000000) >> 29) == 0) {
3823                         start_reg = idx_value << 2;
3824                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
3825                                 reg = start_reg;
3826                                 if (!si_vm_reg_valid(reg)) {
3827                                         DRM_ERROR("CP DMA Bad SRC register\n");
3828                                         return -EINVAL;
3829                                 }
3830                         } else {
3831                                 for (i = 0; i < (command & 0x1fffff); i++) {
3832                                         reg = start_reg + (4 * i);
3833                                         if (!si_vm_reg_valid(reg)) {
3834                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3835                                                 return -EINVAL;
3836                                         }
3837                                 }
3838                         }
3839                 }
3840         }
3841         if (command & PACKET3_CP_DMA_CMD_DAS) {
3842                 /* dst address space is register */
3843                 if (((info & 0x00300000) >> 20) == 0) {
3844                         start_reg = ib[idx + 2];
3845                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
3846                                 reg = start_reg;
3847                                 if (!si_vm_reg_valid(reg)) {
3848                                         DRM_ERROR("CP DMA Bad DST register\n");
3849                                         return -EINVAL;
3850                                 }
3851                         } else {
3852                                 for (i = 0; i < (command & 0x1fffff); i++) {
3853                                         reg = start_reg + (4 * i);
3854                                 if (!si_vm_reg_valid(reg)) {
3855                                                 DRM_ERROR("CP DMA Bad DST register\n");
3856                                                 return -EINVAL;
3857                                         }
3858                                 }
3859                         }
3860                 }
3861         }
3862         return 0;
3863 }
3864
3865 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3866                                    u32 *ib, struct radeon_cs_packet *pkt)
3867 {
3868         int r;
3869         u32 idx = pkt->idx + 1;
3870         u32 idx_value = ib[idx];
3871         u32 start_reg, end_reg, reg, i;
3872
3873         switch (pkt->opcode) {
3874         case PACKET3_NOP:
3875         case PACKET3_SET_BASE:
3876         case PACKET3_CLEAR_STATE:
3877         case PACKET3_INDEX_BUFFER_SIZE:
3878         case PACKET3_DISPATCH_DIRECT:
3879         case PACKET3_DISPATCH_INDIRECT:
3880         case PACKET3_ALLOC_GDS:
3881         case PACKET3_WRITE_GDS_RAM:
3882         case PACKET3_ATOMIC_GDS:
3883         case PACKET3_ATOMIC:
3884         case PACKET3_OCCLUSION_QUERY:
3885         case PACKET3_SET_PREDICATION:
3886         case PACKET3_COND_EXEC:
3887         case PACKET3_PRED_EXEC:
3888         case PACKET3_DRAW_INDIRECT:
3889         case PACKET3_DRAW_INDEX_INDIRECT:
3890         case PACKET3_INDEX_BASE:
3891         case PACKET3_DRAW_INDEX_2:
3892         case PACKET3_CONTEXT_CONTROL:
3893         case PACKET3_INDEX_TYPE:
3894         case PACKET3_DRAW_INDIRECT_MULTI:
3895         case PACKET3_DRAW_INDEX_AUTO:
3896         case PACKET3_DRAW_INDEX_IMMD:
3897         case PACKET3_NUM_INSTANCES:
3898         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3899         case PACKET3_STRMOUT_BUFFER_UPDATE:
3900         case PACKET3_DRAW_INDEX_OFFSET_2:
3901         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3902         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3903         case PACKET3_MPEG_INDEX:
3904         case PACKET3_WAIT_REG_MEM:
3905         case PACKET3_MEM_WRITE:
3906         case PACKET3_PFP_SYNC_ME:
3907         case PACKET3_SURFACE_SYNC:
3908         case PACKET3_EVENT_WRITE:
3909         case PACKET3_EVENT_WRITE_EOP:
3910         case PACKET3_EVENT_WRITE_EOS:
3911         case PACKET3_SET_CONTEXT_REG:
3912         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3913         case PACKET3_SET_SH_REG:
3914         case PACKET3_SET_SH_REG_OFFSET:
3915         case PACKET3_INCREMENT_DE_COUNTER:
3916         case PACKET3_WAIT_ON_CE_COUNTER:
3917         case PACKET3_WAIT_ON_AVAIL_BUFFER:
3918         case PACKET3_ME_WRITE:
3919                 break;
3920         case PACKET3_COPY_DATA:
3921                 if ((idx_value & 0xf00) == 0) {
3922                         reg = ib[idx + 3] * 4;
3923                         if (!si_vm_reg_valid(reg))
3924                                 return -EINVAL;
3925                 }
3926                 break;
3927         case PACKET3_WRITE_DATA:
3928                 if ((idx_value & 0xf00) == 0) {
3929                         start_reg = ib[idx + 1] * 4;
3930                         if (idx_value & 0x10000) {
3931                                 if (!si_vm_reg_valid(start_reg))
3932                                         return -EINVAL;
3933                         } else {
3934                                 for (i = 0; i < (pkt->count - 2); i++) {
3935                                         reg = start_reg + (4 * i);
3936                                         if (!si_vm_reg_valid(reg))
3937                                                 return -EINVAL;
3938                                 }
3939                         }
3940                 }
3941                 break;
3942         case PACKET3_COND_WRITE:
3943                 if (idx_value & 0x100) {
3944                         reg = ib[idx + 5] * 4;
3945                         if (!si_vm_reg_valid(reg))
3946                                 return -EINVAL;
3947                 }
3948                 break;
3949         case PACKET3_COPY_DW:
3950                 if (idx_value & 0x2) {
3951                         reg = ib[idx + 3] * 4;
3952                         if (!si_vm_reg_valid(reg))
3953                                 return -EINVAL;
3954                 }
3955                 break;
3956         case PACKET3_SET_CONFIG_REG:
3957                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3958                 end_reg = 4 * pkt->count + start_reg - 4;
3959                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3960                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3961                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3962                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3963                         return -EINVAL;
3964                 }
3965                 for (i = 0; i < pkt->count; i++) {
3966                         reg = start_reg + (4 * i);
3967                         if (!si_vm_reg_valid(reg))
3968                                 return -EINVAL;
3969                 }
3970                 break;
3971         case PACKET3_CP_DMA:
3972                 r = si_vm_packet3_cp_dma_check(ib, idx);
3973                 if (r)
3974                         return r;
3975                 break;
3976         default:
3977                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3978                 return -EINVAL;
3979         }
3980         return 0;
3981 }
3982
3983 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3984                                        u32 *ib, struct radeon_cs_packet *pkt)
3985 {
3986         int r;
3987         u32 idx = pkt->idx + 1;
3988         u32 idx_value = ib[idx];
3989         u32 start_reg, reg, i;
3990
3991         switch (pkt->opcode) {
3992         case PACKET3_NOP:
3993         case PACKET3_SET_BASE:
3994         case PACKET3_CLEAR_STATE:
3995         case PACKET3_DISPATCH_DIRECT:
3996         case PACKET3_DISPATCH_INDIRECT:
3997         case PACKET3_ALLOC_GDS:
3998         case PACKET3_WRITE_GDS_RAM:
3999         case PACKET3_ATOMIC_GDS:
4000         case PACKET3_ATOMIC:
4001         case PACKET3_OCCLUSION_QUERY:
4002         case PACKET3_SET_PREDICATION:
4003         case PACKET3_COND_EXEC:
4004         case PACKET3_PRED_EXEC:
4005         case PACKET3_CONTEXT_CONTROL:
4006         case PACKET3_STRMOUT_BUFFER_UPDATE:
4007         case PACKET3_WAIT_REG_MEM:
4008         case PACKET3_MEM_WRITE:
4009         case PACKET3_PFP_SYNC_ME:
4010         case PACKET3_SURFACE_SYNC:
4011         case PACKET3_EVENT_WRITE:
4012         case PACKET3_EVENT_WRITE_EOP:
4013         case PACKET3_EVENT_WRITE_EOS:
4014         case PACKET3_SET_CONTEXT_REG:
4015         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4016         case PACKET3_SET_SH_REG:
4017         case PACKET3_SET_SH_REG_OFFSET:
4018         case PACKET3_INCREMENT_DE_COUNTER:
4019         case PACKET3_WAIT_ON_CE_COUNTER:
4020         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4021         case PACKET3_ME_WRITE:
4022                 break;
4023         case PACKET3_COPY_DATA:
4024                 if ((idx_value & 0xf00) == 0) {
4025                         reg = ib[idx + 3] * 4;
4026                         if (!si_vm_reg_valid(reg))
4027                                 return -EINVAL;
4028                 }
4029                 break;
4030         case PACKET3_WRITE_DATA:
4031                 if ((idx_value & 0xf00) == 0) {
4032                         start_reg = ib[idx + 1] * 4;
4033                         if (idx_value & 0x10000) {
4034                                 if (!si_vm_reg_valid(start_reg))
4035                                         return -EINVAL;
4036                         } else {
4037                                 for (i = 0; i < (pkt->count - 2); i++) {
4038                                         reg = start_reg + (4 * i);
4039                                         if (!si_vm_reg_valid(reg))
4040                                                 return -EINVAL;
4041                                 }
4042                         }
4043                 }
4044                 break;
4045         case PACKET3_COND_WRITE:
4046                 if (idx_value & 0x100) {
4047                         reg = ib[idx + 5] * 4;
4048                         if (!si_vm_reg_valid(reg))
4049                                 return -EINVAL;
4050                 }
4051                 break;
4052         case PACKET3_COPY_DW:
4053                 if (idx_value & 0x2) {
4054                         reg = ib[idx + 3] * 4;
4055                         if (!si_vm_reg_valid(reg))
4056                                 return -EINVAL;
4057                 }
4058                 break;
4059         case PACKET3_CP_DMA:
4060                 r = si_vm_packet3_cp_dma_check(ib, idx);
4061                 if (r)
4062                         return r;
4063                 break;
4064         default:
4065                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4066                 return -EINVAL;
4067         }
4068         return 0;
4069 }
4070
4071 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4072 {
4073         int ret = 0;
4074         u32 idx = 0;
4075         struct radeon_cs_packet pkt;
4076
4077         do {
4078                 pkt.idx = idx;
4079                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4080                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4081                 pkt.one_reg_wr = 0;
4082                 switch (pkt.type) {
4083                 case RADEON_PACKET_TYPE0:
4084                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4085                         ret = -EINVAL;
4086                         break;
4087                 case RADEON_PACKET_TYPE2:
4088                         idx += 1;
4089                         break;
4090                 case RADEON_PACKET_TYPE3:
4091                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4092                         if (ib->is_const_ib)
4093                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4094                         else {
4095                                 switch (ib->ring) {
4096                                 case RADEON_RING_TYPE_GFX_INDEX:
4097                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4098                                         break;
4099                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4100                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4101                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4102                                         break;
4103                                 default:
4104                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4105                                         ret = -EINVAL;
4106                                         break;
4107                                 }
4108                         }
4109                         idx += pkt.count + 2;
4110                         break;
4111                 default:
4112                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4113                         ret = -EINVAL;
4114                         break;
4115                 }
4116                 if (ret)
4117                         break;
4118         } while (idx < ib->length_dw);
4119
4120         return ret;
4121 }
4122
4123 /*
4124  * vm
4125  */
4126 int si_vm_init(struct radeon_device *rdev)
4127 {
4128         /* number of VMs */
4129         rdev->vm_manager.nvm = 16;
4130         /* base offset of vram pages */
4131         rdev->vm_manager.vram_base_offset = 0;
4132
4133         return 0;
4134 }
4135
4136 void si_vm_fini(struct radeon_device *rdev)
4137 {
4138 }
4139
4140 /**
4141  * si_vm_set_page - update the page tables using the CP
4142  *
4143  * @rdev: radeon_device pointer
4144  * @ib: indirect buffer to fill with commands
4145  * @pe: addr of the page entry
4146  * @addr: dst addr to write into pe
4147  * @count: number of page entries to update
4148  * @incr: increase next addr by incr bytes
4149  * @flags: access flags
4150  *
4151  * Update the page tables using the CP (SI).
4152  */
4153 void si_vm_set_page(struct radeon_device *rdev,
4154                     struct radeon_ib *ib,
4155                     uint64_t pe,
4156                     uint64_t addr, unsigned count,
4157                     uint32_t incr, uint32_t flags)
4158 {
4159         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4160         uint64_t value;
4161         unsigned ndw;
4162
4163         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4164                 while (count) {
4165                         ndw = 2 + count * 2;
4166                         if (ndw > 0x3FFE)
4167                                 ndw = 0x3FFE;
4168
4169                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4170                         ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4171                                         WRITE_DATA_DST_SEL(1));
4172                         ib->ptr[ib->length_dw++] = pe;
4173                         ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4174                         for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4175                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4176                                         value = radeon_vm_map_gart(rdev, addr);
4177                                         value &= 0xFFFFFFFFFFFFF000ULL;
4178                                 } else if (flags & RADEON_VM_PAGE_VALID) {
4179                                         value = addr;
4180                                 } else {
4181                                         value = 0;
4182                                 }
4183                                 addr += incr;
4184                                 value |= r600_flags;
4185                                 ib->ptr[ib->length_dw++] = value;
4186                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4187                         }
4188                 }
4189         } else {
4190                 /* DMA */
4191                 if (flags & RADEON_VM_PAGE_SYSTEM) {
4192                         while (count) {
4193                                 ndw = count * 2;
4194                                 if (ndw > 0xFFFFE)
4195                                         ndw = 0xFFFFE;
4196
4197                                 /* for non-physically contiguous pages (system) */
4198                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4199                                 ib->ptr[ib->length_dw++] = pe;
4200                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4201                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4202                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
4203                                                 value = radeon_vm_map_gart(rdev, addr);
4204                                                 value &= 0xFFFFFFFFFFFFF000ULL;
4205                                         } else if (flags & RADEON_VM_PAGE_VALID) {
4206                                                 value = addr;
4207                                         } else {
4208                                                 value = 0;
4209                                         }
4210                                         addr += incr;
4211                                         value |= r600_flags;
4212                                         ib->ptr[ib->length_dw++] = value;
4213                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
4214                                 }
4215                         }
4216                 } else {
4217                         while (count) {
4218                                 ndw = count * 2;
4219                                 if (ndw > 0xFFFFE)
4220                                         ndw = 0xFFFFE;
4221
4222                                 if (flags & RADEON_VM_PAGE_VALID)
4223                                         value = addr;
4224                                 else
4225                                         value = 0;
4226                                 /* for physically contiguous pages (vram) */
4227                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4228                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4229                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4230                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4231                                 ib->ptr[ib->length_dw++] = 0;
4232                                 ib->ptr[ib->length_dw++] = value; /* value */
4233                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4234                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
4235                                 ib->ptr[ib->length_dw++] = 0;
4236                                 pe += ndw * 4;
4237                                 addr += (ndw / 2) * incr;
4238                                 count -= ndw / 2;
4239                         }
4240                 }
4241                 while (ib->length_dw & 0x7)
4242                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4243         }
4244 }
4245
4246 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4247 {
4248         struct radeon_ring *ring = &rdev->ring[ridx];
4249
4250         if (vm == NULL)
4251                 return;
4252
4253         /* write new base address */
4254         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4255         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4256                                  WRITE_DATA_DST_SEL(0)));
4257
4258         if (vm->id < 8) {
4259                 radeon_ring_write(ring,
4260                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4261         } else {
4262                 radeon_ring_write(ring,
4263                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4264         }
4265         radeon_ring_write(ring, 0);
4266         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4267
4268         /* flush hdp cache */
4269         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4270         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4271                                  WRITE_DATA_DST_SEL(0)));
4272         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4273         radeon_ring_write(ring, 0);
4274         radeon_ring_write(ring, 0x1);
4275
4276         /* bits 0-15 are the VM contexts0-15 */
4277         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4278         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4279                                  WRITE_DATA_DST_SEL(0)));
4280         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4281         radeon_ring_write(ring, 0);
4282         radeon_ring_write(ring, 1 << vm->id);
4283
4284         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4285         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4286         radeon_ring_write(ring, 0x0);
4287 }
4288
4289 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4290 {
4291         struct radeon_ring *ring = &rdev->ring[ridx];
4292
4293         if (vm == NULL)
4294                 return;
4295
4296         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4297         if (vm->id < 8) {
4298                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4299         } else {
4300                 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4301         }
4302         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4303
4304         /* flush hdp cache */
4305         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4306         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4307         radeon_ring_write(ring, 1);
4308
4309         /* bits 0-7 are the VM contexts0-7 */
4310         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4311         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4312         radeon_ring_write(ring, 1 << vm->id);
4313 }
4314
4315 /*
4316  * RLC
4317  */
4318 void si_rlc_fini(struct radeon_device *rdev)
4319 {
4320         int r;
4321
4322         /* save restore block */
4323         if (rdev->rlc.save_restore_obj) {
4324                 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4325                 if (unlikely(r != 0))
4326                         dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4327                 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4328                 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4329
4330                 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4331                 rdev->rlc.save_restore_obj = NULL;
4332         }
4333
4334         /* clear state block */
4335         if (rdev->rlc.clear_state_obj) {
4336                 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4337                 if (unlikely(r != 0))
4338                         dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4339                 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4340                 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4341
4342                 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4343                 rdev->rlc.clear_state_obj = NULL;
4344         }
4345 }
4346
4347 int si_rlc_init(struct radeon_device *rdev)
4348 {
4349         int r;
4350
4351         /* save restore block */
4352         if (rdev->rlc.save_restore_obj == NULL) {
4353                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4354                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4355                                      &rdev->rlc.save_restore_obj);
4356                 if (r) {
4357                         dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4358                         return r;
4359                 }
4360         }
4361
4362         r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4363         if (unlikely(r != 0)) {
4364                 si_rlc_fini(rdev);
4365                 return r;
4366         }
4367         r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4368                           &rdev->rlc.save_restore_gpu_addr);
4369         radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4370         if (r) {
4371                 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4372                 si_rlc_fini(rdev);
4373                 return r;
4374         }
4375
4376         /* clear state block */
4377         if (rdev->rlc.clear_state_obj == NULL) {
4378                 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4379                                      RADEON_GEM_DOMAIN_VRAM, NULL,
4380                                      &rdev->rlc.clear_state_obj);
4381                 if (r) {
4382                         dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4383                         si_rlc_fini(rdev);
4384                         return r;
4385                 }
4386         }
4387         r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4388         if (unlikely(r != 0)) {
4389                 si_rlc_fini(rdev);
4390                 return r;
4391         }
4392         r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4393                           &rdev->rlc.clear_state_gpu_addr);
4394         radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4395         if (r) {
4396                 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4397                 si_rlc_fini(rdev);
4398                 return r;
4399         }
4400
4401         return 0;
4402 }
4403
4404 static void si_rlc_stop(struct radeon_device *rdev)
4405 {
4406         WREG32(RLC_CNTL, 0);
4407 }
4408
4409 static void si_rlc_start(struct radeon_device *rdev)
4410 {
4411         WREG32(RLC_CNTL, RLC_ENABLE);
4412 }
4413
4414 static int si_rlc_resume(struct radeon_device *rdev)
4415 {
4416         u32 i;
4417         const __be32 *fw_data;
4418
4419         if (!rdev->rlc_fw)
4420                 return -EINVAL;
4421
4422         si_rlc_stop(rdev);
4423
4424         WREG32(RLC_RL_BASE, 0);
4425         WREG32(RLC_RL_SIZE, 0);
4426         WREG32(RLC_LB_CNTL, 0);
4427         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4428         WREG32(RLC_LB_CNTR_INIT, 0);
4429
4430         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4431         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4432
4433         WREG32(RLC_MC_CNTL, 0);
4434         WREG32(RLC_UCODE_CNTL, 0);
4435
4436         fw_data = (const __be32 *)rdev->rlc_fw->data;
4437         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4438                 WREG32(RLC_UCODE_ADDR, i);
4439                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4440         }
4441         WREG32(RLC_UCODE_ADDR, 0);
4442
4443         si_rlc_start(rdev);
4444
4445         return 0;
4446 }
4447
4448 static void si_enable_interrupts(struct radeon_device *rdev)
4449 {
4450         u32 ih_cntl = RREG32(IH_CNTL);
4451         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4452
4453         ih_cntl |= ENABLE_INTR;
4454         ih_rb_cntl |= IH_RB_ENABLE;
4455         WREG32(IH_CNTL, ih_cntl);
4456         WREG32(IH_RB_CNTL, ih_rb_cntl);
4457         rdev->ih.enabled = true;
4458 }
4459
4460 static void si_disable_interrupts(struct radeon_device *rdev)
4461 {
4462         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4463         u32 ih_cntl = RREG32(IH_CNTL);
4464
4465         ih_rb_cntl &= ~IH_RB_ENABLE;
4466         ih_cntl &= ~ENABLE_INTR;
4467         WREG32(IH_RB_CNTL, ih_rb_cntl);
4468         WREG32(IH_CNTL, ih_cntl);
4469         /* set rptr, wptr to 0 */
4470         WREG32(IH_RB_RPTR, 0);
4471         WREG32(IH_RB_WPTR, 0);
4472         rdev->ih.enabled = false;
4473         rdev->ih.rptr = 0;
4474 }
4475
4476 static void si_disable_interrupt_state(struct radeon_device *rdev)
4477 {
4478         u32 tmp;
4479
4480         WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4481         WREG32(CP_INT_CNTL_RING1, 0);
4482         WREG32(CP_INT_CNTL_RING2, 0);
4483         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4484         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4485         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4486         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4487         WREG32(GRBM_INT_CNTL, 0);
4488         if (rdev->num_crtc >= 2) {
4489                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4490                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4491         }
4492         if (rdev->num_crtc >= 4) {
4493                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4494                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4495         }
4496         if (rdev->num_crtc >= 6) {
4497                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4498                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4499         }
4500
4501         if (rdev->num_crtc >= 2) {
4502                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4503                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4504         }
4505         if (rdev->num_crtc >= 4) {
4506                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4507                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4508         }
4509         if (rdev->num_crtc >= 6) {
4510                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4511                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4512         }
4513
4514         if (!ASIC_IS_NODCE(rdev)) {
4515                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4516
4517                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4518                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4519                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4520                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4521                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4522                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4523                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4524                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4525                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4526                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4527                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4528                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4529         }
4530 }
4531
4532 static int si_irq_init(struct radeon_device *rdev)
4533 {
4534         int ret = 0;
4535         int rb_bufsz;
4536         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4537
4538         /* allocate ring */
4539         ret = r600_ih_ring_alloc(rdev);
4540         if (ret)
4541                 return ret;
4542
4543         /* disable irqs */
4544         si_disable_interrupts(rdev);
4545
4546         /* init rlc */
4547         ret = si_rlc_resume(rdev);
4548         if (ret) {
4549                 r600_ih_ring_fini(rdev);
4550                 return ret;
4551         }
4552
4553         /* setup interrupt control */
4554         /* set dummy read address to ring address */
4555         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4556         interrupt_cntl = RREG32(INTERRUPT_CNTL);
4557         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4558          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4559          */
4560         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4561         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4562         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4563         WREG32(INTERRUPT_CNTL, interrupt_cntl);
4564
4565         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4566         rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4567
4568         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4569                       IH_WPTR_OVERFLOW_CLEAR |
4570                       (rb_bufsz << 1));
4571
4572         if (rdev->wb.enabled)
4573                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4574
4575         /* set the writeback address whether it's enabled or not */
4576         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4577         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4578
4579         WREG32(IH_RB_CNTL, ih_rb_cntl);
4580
4581         /* set rptr, wptr to 0 */
4582         WREG32(IH_RB_RPTR, 0);
4583         WREG32(IH_RB_WPTR, 0);
4584
4585         /* Default settings for IH_CNTL (disabled at first) */
4586         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4587         /* RPTR_REARM only works if msi's are enabled */
4588         if (rdev->msi_enabled)
4589                 ih_cntl |= RPTR_REARM;
4590         WREG32(IH_CNTL, ih_cntl);
4591
4592         /* force the active interrupt state to all disabled */
4593         si_disable_interrupt_state(rdev);
4594
4595         pci_set_master(rdev->pdev);
4596
4597         /* enable irqs */
4598         si_enable_interrupts(rdev);
4599
4600         return ret;
4601 }
4602
4603 int si_irq_set(struct radeon_device *rdev)
4604 {
4605         u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4606         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4607         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4608         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
4609         u32 grbm_int_cntl = 0;
4610         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4611         u32 dma_cntl, dma_cntl1;
4612
4613         if (!rdev->irq.installed) {
4614                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4615                 return -EINVAL;
4616         }
4617         /* don't enable anything if the ih is disabled */
4618         if (!rdev->ih.enabled) {
4619                 si_disable_interrupts(rdev);
4620                 /* force the active interrupt state to all disabled */
4621                 si_disable_interrupt_state(rdev);
4622                 return 0;
4623         }
4624
4625         if (!ASIC_IS_NODCE(rdev)) {
4626                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4627                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4628                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4629                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4630                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4631                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4632         }
4633
4634         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4635         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4636
4637         /* enable CP interrupts on all rings */
4638         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4639                 DRM_DEBUG("si_irq_set: sw int gfx\n");
4640                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4641         }
4642         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4643                 DRM_DEBUG("si_irq_set: sw int cp1\n");
4644                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4645         }
4646         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4647                 DRM_DEBUG("si_irq_set: sw int cp2\n");
4648                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4649         }
4650         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4651                 DRM_DEBUG("si_irq_set: sw int dma\n");
4652                 dma_cntl |= TRAP_ENABLE;
4653         }
4654
4655         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4656                 DRM_DEBUG("si_irq_set: sw int dma1\n");
4657                 dma_cntl1 |= TRAP_ENABLE;
4658         }
4659         if (rdev->irq.crtc_vblank_int[0] ||
4660             atomic_read(&rdev->irq.pflip[0])) {
4661                 DRM_DEBUG("si_irq_set: vblank 0\n");
4662                 crtc1 |= VBLANK_INT_MASK;
4663         }
4664         if (rdev->irq.crtc_vblank_int[1] ||
4665             atomic_read(&rdev->irq.pflip[1])) {
4666                 DRM_DEBUG("si_irq_set: vblank 1\n");
4667                 crtc2 |= VBLANK_INT_MASK;
4668         }
4669         if (rdev->irq.crtc_vblank_int[2] ||
4670             atomic_read(&rdev->irq.pflip[2])) {
4671                 DRM_DEBUG("si_irq_set: vblank 2\n");
4672                 crtc3 |= VBLANK_INT_MASK;
4673         }
4674         if (rdev->irq.crtc_vblank_int[3] ||
4675             atomic_read(&rdev->irq.pflip[3])) {
4676                 DRM_DEBUG("si_irq_set: vblank 3\n");
4677                 crtc4 |= VBLANK_INT_MASK;
4678         }
4679         if (rdev->irq.crtc_vblank_int[4] ||
4680             atomic_read(&rdev->irq.pflip[4])) {
4681                 DRM_DEBUG("si_irq_set: vblank 4\n");
4682                 crtc5 |= VBLANK_INT_MASK;
4683         }
4684         if (rdev->irq.crtc_vblank_int[5] ||
4685             atomic_read(&rdev->irq.pflip[5])) {
4686                 DRM_DEBUG("si_irq_set: vblank 5\n");
4687                 crtc6 |= VBLANK_INT_MASK;
4688         }
4689         if (rdev->irq.hpd[0]) {
4690                 DRM_DEBUG("si_irq_set: hpd 1\n");
4691                 hpd1 |= DC_HPDx_INT_EN;
4692         }
4693         if (rdev->irq.hpd[1]) {
4694                 DRM_DEBUG("si_irq_set: hpd 2\n");
4695                 hpd2 |= DC_HPDx_INT_EN;
4696         }
4697         if (rdev->irq.hpd[2]) {
4698                 DRM_DEBUG("si_irq_set: hpd 3\n");
4699                 hpd3 |= DC_HPDx_INT_EN;
4700         }
4701         if (rdev->irq.hpd[3]) {
4702                 DRM_DEBUG("si_irq_set: hpd 4\n");
4703                 hpd4 |= DC_HPDx_INT_EN;
4704         }
4705         if (rdev->irq.hpd[4]) {
4706                 DRM_DEBUG("si_irq_set: hpd 5\n");
4707                 hpd5 |= DC_HPDx_INT_EN;
4708         }
4709         if (rdev->irq.hpd[5]) {
4710                 DRM_DEBUG("si_irq_set: hpd 6\n");
4711                 hpd6 |= DC_HPDx_INT_EN;
4712         }
4713
4714         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4715         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4716         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4717
4718         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4719         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4720
4721         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4722
4723         if (rdev->num_crtc >= 2) {
4724                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4725                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4726         }
4727         if (rdev->num_crtc >= 4) {
4728                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4729                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4730         }
4731         if (rdev->num_crtc >= 6) {
4732                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4733                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4734         }
4735
4736         if (rdev->num_crtc >= 2) {
4737                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4738                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4739         }
4740         if (rdev->num_crtc >= 4) {
4741                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4742                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4743         }
4744         if (rdev->num_crtc >= 6) {
4745                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4746                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4747         }
4748
4749         if (!ASIC_IS_NODCE(rdev)) {
4750                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4751                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4752                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4753                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4754                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4755                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4756         }
4757
4758         return 0;
4759 }
4760
4761 static inline void si_irq_ack(struct radeon_device *rdev)
4762 {
4763         u32 tmp;
4764
4765         if (ASIC_IS_NODCE(rdev))
4766                 return;
4767
4768         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4769         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4770         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4771         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4772         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4773         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4774         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4775         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4776         if (rdev->num_crtc >= 4) {
4777                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4778                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4779         }
4780         if (rdev->num_crtc >= 6) {
4781                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4782                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4783         }
4784
4785         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4786                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4787         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4788                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4789         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4790                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4791         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4792                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4793         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4794                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4795         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4796                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4797
4798         if (rdev->num_crtc >= 4) {
4799                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4800                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4801                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4802                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4803                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4804                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4805                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4806                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4807                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4808                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4809                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4810                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4811         }
4812
4813         if (rdev->num_crtc >= 6) {
4814                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4815                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4816                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4817                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4818                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4819                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4820                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4821                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4822                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4823                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4824                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4825                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4826         }
4827
4828         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4829                 tmp = RREG32(DC_HPD1_INT_CONTROL);
4830                 tmp |= DC_HPDx_INT_ACK;
4831                 WREG32(DC_HPD1_INT_CONTROL, tmp);
4832         }
4833         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4834                 tmp = RREG32(DC_HPD2_INT_CONTROL);
4835                 tmp |= DC_HPDx_INT_ACK;
4836                 WREG32(DC_HPD2_INT_CONTROL, tmp);
4837         }
4838         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4839                 tmp = RREG32(DC_HPD3_INT_CONTROL);
4840                 tmp |= DC_HPDx_INT_ACK;
4841                 WREG32(DC_HPD3_INT_CONTROL, tmp);
4842         }
4843         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4844                 tmp = RREG32(DC_HPD4_INT_CONTROL);
4845                 tmp |= DC_HPDx_INT_ACK;
4846                 WREG32(DC_HPD4_INT_CONTROL, tmp);
4847         }
4848         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4849                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4850                 tmp |= DC_HPDx_INT_ACK;
4851                 WREG32(DC_HPD5_INT_CONTROL, tmp);
4852         }
4853         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4854                 tmp = RREG32(DC_HPD5_INT_CONTROL);
4855                 tmp |= DC_HPDx_INT_ACK;
4856                 WREG32(DC_HPD6_INT_CONTROL, tmp);
4857         }
4858 }
4859
4860 static void si_irq_disable(struct radeon_device *rdev)
4861 {
4862         si_disable_interrupts(rdev);
4863         /* Wait and acknowledge irq */
4864         mdelay(1);
4865         si_irq_ack(rdev);
4866         si_disable_interrupt_state(rdev);
4867 }
4868
4869 static void si_irq_suspend(struct radeon_device *rdev)
4870 {
4871         si_irq_disable(rdev);
4872         si_rlc_stop(rdev);
4873 }
4874
4875 static void si_irq_fini(struct radeon_device *rdev)
4876 {
4877         si_irq_suspend(rdev);
4878         r600_ih_ring_fini(rdev);
4879 }
4880
4881 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4882 {
4883         u32 wptr, tmp;
4884
4885         if (rdev->wb.enabled)
4886                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4887         else
4888                 wptr = RREG32(IH_RB_WPTR);
4889
4890         if (wptr & RB_OVERFLOW) {
4891                 /* When a ring buffer overflow happen start parsing interrupt
4892                  * from the last not overwritten vector (wptr + 16). Hopefully
4893                  * this should allow us to catchup.
4894                  */
4895                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4896                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4897                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4898                 tmp = RREG32(IH_RB_CNTL);
4899                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4900                 WREG32(IH_RB_CNTL, tmp);
4901         }
4902         return (wptr & rdev->ih.ptr_mask);
4903 }
4904
4905 /*        SI IV Ring
4906  * Each IV ring entry is 128 bits:
4907  * [7:0]    - interrupt source id
4908  * [31:8]   - reserved
4909  * [59:32]  - interrupt source data
4910  * [63:60]  - reserved
4911  * [71:64]  - RINGID
4912  * [79:72]  - VMID
4913  * [127:80] - reserved
4914  */
4915 int si_irq_process(struct radeon_device *rdev)
4916 {
4917         u32 wptr;
4918         u32 rptr;
4919         u32 src_id, src_data, ring_id;
4920         u32 ring_index;
4921         bool queue_hotplug = false;
4922
4923         if (!rdev->ih.enabled || rdev->shutdown)
4924                 return IRQ_NONE;
4925
4926         wptr = si_get_ih_wptr(rdev);
4927
4928 restart_ih:
4929         /* is somebody else already processing irqs? */
4930         if (atomic_xchg(&rdev->ih.lock, 1))
4931                 return IRQ_NONE;
4932
4933         rptr = rdev->ih.rptr;
4934         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4935
4936         /* Order reading of wptr vs. reading of IH ring data */
4937         rmb();
4938
4939         /* display interrupts */
4940         si_irq_ack(rdev);
4941
4942         while (rptr != wptr) {
4943                 /* wptr/rptr are in bytes! */
4944                 ring_index = rptr / 4;
4945                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4946                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4947                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4948
4949                 switch (src_id) {
4950                 case 1: /* D1 vblank/vline */
4951                         switch (src_data) {
4952                         case 0: /* D1 vblank */
4953                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4954                                         if (rdev->irq.crtc_vblank_int[0]) {
4955                                                 drm_handle_vblank(rdev->ddev, 0);
4956                                                 rdev->pm.vblank_sync = true;
4957                                                 wake_up(&rdev->irq.vblank_queue);
4958                                         }
4959                                         if (atomic_read(&rdev->irq.pflip[0]))
4960                                                 radeon_crtc_handle_flip(rdev, 0);
4961                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4962                                         DRM_DEBUG("IH: D1 vblank\n");
4963                                 }
4964                                 break;
4965                         case 1: /* D1 vline */
4966                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4967                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4968                                         DRM_DEBUG("IH: D1 vline\n");
4969                                 }
4970                                 break;
4971                         default:
4972                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4973                                 break;
4974                         }
4975                         break;
4976                 case 2: /* D2 vblank/vline */
4977                         switch (src_data) {
4978                         case 0: /* D2 vblank */
4979                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4980                                         if (rdev->irq.crtc_vblank_int[1]) {
4981                                                 drm_handle_vblank(rdev->ddev, 1);
4982                                                 rdev->pm.vblank_sync = true;
4983                                                 wake_up(&rdev->irq.vblank_queue);
4984                                         }
4985                                         if (atomic_read(&rdev->irq.pflip[1]))
4986                                                 radeon_crtc_handle_flip(rdev, 1);
4987                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4988                                         DRM_DEBUG("IH: D2 vblank\n");
4989                                 }
4990                                 break;
4991                         case 1: /* D2 vline */
4992                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4993                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4994                                         DRM_DEBUG("IH: D2 vline\n");
4995                                 }
4996                                 break;
4997                         default:
4998                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4999                                 break;
5000                         }
5001                         break;
5002                 case 3: /* D3 vblank/vline */
5003                         switch (src_data) {
5004                         case 0: /* D3 vblank */
5005                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5006                                         if (rdev->irq.crtc_vblank_int[2]) {
5007                                                 drm_handle_vblank(rdev->ddev, 2);
5008                                                 rdev->pm.vblank_sync = true;
5009                                                 wake_up(&rdev->irq.vblank_queue);
5010                                         }
5011                                         if (atomic_read(&rdev->irq.pflip[2]))
5012                                                 radeon_crtc_handle_flip(rdev, 2);
5013                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5014                                         DRM_DEBUG("IH: D3 vblank\n");
5015                                 }
5016                                 break;
5017                         case 1: /* D3 vline */
5018                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5019                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5020                                         DRM_DEBUG("IH: D3 vline\n");
5021                                 }
5022                                 break;
5023                         default:
5024                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5025                                 break;
5026                         }
5027                         break;
5028                 case 4: /* D4 vblank/vline */
5029                         switch (src_data) {
5030                         case 0: /* D4 vblank */
5031                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5032                                         if (rdev->irq.crtc_vblank_int[3]) {
5033                                                 drm_handle_vblank(rdev->ddev, 3);
5034                                                 rdev->pm.vblank_sync = true;
5035                                                 wake_up(&rdev->irq.vblank_queue);
5036                                         }
5037                                         if (atomic_read(&rdev->irq.pflip[3]))
5038                                                 radeon_crtc_handle_flip(rdev, 3);
5039                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5040                                         DRM_DEBUG("IH: D4 vblank\n");
5041                                 }
5042                                 break;
5043                         case 1: /* D4 vline */
5044                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5045                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5046                                         DRM_DEBUG("IH: D4 vline\n");
5047                                 }
5048                                 break;
5049                         default:
5050                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5051                                 break;
5052                         }
5053                         break;
5054                 case 5: /* D5 vblank/vline */
5055                         switch (src_data) {
5056                         case 0: /* D5 vblank */
5057                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5058                                         if (rdev->irq.crtc_vblank_int[4]) {
5059                                                 drm_handle_vblank(rdev->ddev, 4);
5060                                                 rdev->pm.vblank_sync = true;
5061                                                 wake_up(&rdev->irq.vblank_queue);
5062                                         }
5063                                         if (atomic_read(&rdev->irq.pflip[4]))
5064                                                 radeon_crtc_handle_flip(rdev, 4);
5065                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5066                                         DRM_DEBUG("IH: D5 vblank\n");
5067                                 }
5068                                 break;
5069                         case 1: /* D5 vline */
5070                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5071                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5072                                         DRM_DEBUG("IH: D5 vline\n");
5073                                 }
5074                                 break;
5075                         default:
5076                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5077                                 break;
5078                         }
5079                         break;
5080                 case 6: /* D6 vblank/vline */
5081                         switch (src_data) {
5082                         case 0: /* D6 vblank */
5083                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5084                                         if (rdev->irq.crtc_vblank_int[5]) {
5085                                                 drm_handle_vblank(rdev->ddev, 5);
5086                                                 rdev->pm.vblank_sync = true;
5087                                                 wake_up(&rdev->irq.vblank_queue);
5088                                         }
5089                                         if (atomic_read(&rdev->irq.pflip[5]))
5090                                                 radeon_crtc_handle_flip(rdev, 5);
5091                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5092                                         DRM_DEBUG("IH: D6 vblank\n");
5093                                 }
5094                                 break;
5095                         case 1: /* D6 vline */
5096                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5097                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5098                                         DRM_DEBUG("IH: D6 vline\n");
5099                                 }
5100                                 break;
5101                         default:
5102                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5103                                 break;
5104                         }
5105                         break;
5106                 case 42: /* HPD hotplug */
5107                         switch (src_data) {
5108                         case 0:
5109                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5110                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
5111                                         queue_hotplug = true;
5112                                         DRM_DEBUG("IH: HPD1\n");
5113                                 }
5114                                 break;
5115                         case 1:
5116                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5117                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5118                                         queue_hotplug = true;
5119                                         DRM_DEBUG("IH: HPD2\n");
5120                                 }
5121                                 break;
5122                         case 2:
5123                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5124                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5125                                         queue_hotplug = true;
5126                                         DRM_DEBUG("IH: HPD3\n");
5127                                 }
5128                                 break;
5129                         case 3:
5130                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5131                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5132                                         queue_hotplug = true;
5133                                         DRM_DEBUG("IH: HPD4\n");
5134                                 }
5135                                 break;
5136                         case 4:
5137                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5138                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5139                                         queue_hotplug = true;
5140                                         DRM_DEBUG("IH: HPD5\n");
5141                                 }
5142                                 break;
5143                         case 5:
5144                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5145                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5146                                         queue_hotplug = true;
5147                                         DRM_DEBUG("IH: HPD6\n");
5148                                 }
5149                                 break;
5150                         default:
5151                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5152                                 break;
5153                         }
5154                         break;
5155                 case 146:
5156                 case 147:
5157                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5158                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5159                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5160                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5161                                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5162                         /* reset addr and status */
5163                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5164                         break;
5165                 case 176: /* RINGID0 CP_INT */
5166                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5167                         break;
5168                 case 177: /* RINGID1 CP_INT */
5169                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5170                         break;
5171                 case 178: /* RINGID2 CP_INT */
5172                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5173                         break;
5174                 case 181: /* CP EOP event */
5175                         DRM_DEBUG("IH: CP EOP\n");
5176                         switch (ring_id) {
5177                         case 0:
5178                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5179                                 break;
5180                         case 1:
5181                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5182                                 break;
5183                         case 2:
5184                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5185                                 break;
5186                         }
5187                         break;
5188                 case 224: /* DMA trap event */
5189                         DRM_DEBUG("IH: DMA trap\n");
5190                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5191                         break;
5192                 case 233: /* GUI IDLE */
5193                         DRM_DEBUG("IH: GUI idle\n");
5194                         break;
5195                 case 244: /* DMA trap event */
5196                         DRM_DEBUG("IH: DMA1 trap\n");
5197                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5198                         break;
5199                 default:
5200                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5201                         break;
5202                 }
5203
5204                 /* wptr/rptr are in bytes! */
5205                 rptr += 16;
5206                 rptr &= rdev->ih.ptr_mask;
5207         }
5208         if (queue_hotplug)
5209                 schedule_work(&rdev->hotplug_work);
5210         rdev->ih.rptr = rptr;
5211         WREG32(IH_RB_RPTR, rdev->ih.rptr);
5212         atomic_set(&rdev->ih.lock, 0);
5213
5214         /* make sure wptr hasn't changed while processing */
5215         wptr = si_get_ih_wptr(rdev);
5216         if (wptr != rptr)
5217                 goto restart_ih;
5218
5219         return IRQ_HANDLED;
5220 }
5221
5222 /**
5223  * si_copy_dma - copy pages using the DMA engine
5224  *
5225  * @rdev: radeon_device pointer
5226  * @src_offset: src GPU address
5227  * @dst_offset: dst GPU address
5228  * @num_gpu_pages: number of GPU pages to xfer
5229  * @fence: radeon fence object
5230  *
5231  * Copy GPU paging using the DMA engine (SI).
5232  * Used by the radeon ttm implementation to move pages if
5233  * registered as the asic copy callback.
5234  */
5235 int si_copy_dma(struct radeon_device *rdev,
5236                 uint64_t src_offset, uint64_t dst_offset,
5237                 unsigned num_gpu_pages,
5238                 struct radeon_fence **fence)
5239 {
5240         struct radeon_semaphore *sem = NULL;
5241         int ring_index = rdev->asic->copy.dma_ring_index;
5242         struct radeon_ring *ring = &rdev->ring[ring_index];
5243         u32 size_in_bytes, cur_size_in_bytes;
5244         int i, num_loops;
5245         int r = 0;
5246
5247         r = radeon_semaphore_create(rdev, &sem);
5248         if (r) {
5249                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5250                 return r;
5251         }
5252
5253         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5254         num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5255         r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5256         if (r) {
5257                 DRM_ERROR("radeon: moving bo (%d).\n", r);
5258                 radeon_semaphore_free(rdev, &sem, NULL);
5259                 return r;
5260         }
5261
5262         if (radeon_fence_need_sync(*fence, ring->idx)) {
5263                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5264                                             ring->idx);
5265                 radeon_fence_note_sync(*fence, ring->idx);
5266         } else {
5267                 radeon_semaphore_free(rdev, &sem, NULL);
5268         }
5269
5270         for (i = 0; i < num_loops; i++) {
5271                 cur_size_in_bytes = size_in_bytes;
5272                 if (cur_size_in_bytes > 0xFFFFF)
5273                         cur_size_in_bytes = 0xFFFFF;
5274                 size_in_bytes -= cur_size_in_bytes;
5275                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5276                 radeon_ring_write(ring, dst_offset & 0xffffffff);
5277                 radeon_ring_write(ring, src_offset & 0xffffffff);
5278                 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5279                 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5280                 src_offset += cur_size_in_bytes;
5281                 dst_offset += cur_size_in_bytes;
5282         }
5283
5284         r = radeon_fence_emit(rdev, fence, ring->idx);
5285         if (r) {
5286                 radeon_ring_unlock_undo(rdev, ring);
5287                 return r;
5288         }
5289
5290         radeon_ring_unlock_commit(rdev, ring);
5291         radeon_semaphore_free(rdev, &sem, *fence);
5292
5293         return r;
5294 }
5295
5296 /*
5297  * startup/shutdown callbacks
5298  */
5299 static int si_startup(struct radeon_device *rdev)
5300 {
5301         struct radeon_ring *ring;
5302         int r;
5303
5304         si_mc_program(rdev);
5305
5306         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5307             !rdev->rlc_fw || !rdev->mc_fw) {
5308                 r = si_init_microcode(rdev);
5309                 if (r) {
5310                         DRM_ERROR("Failed to load firmware!\n");
5311                         return r;
5312                 }
5313         }
5314
5315         r = si_mc_load_microcode(rdev);
5316         if (r) {
5317                 DRM_ERROR("Failed to load MC firmware!\n");
5318                 return r;
5319         }
5320
5321         r = r600_vram_scratch_init(rdev);
5322         if (r)
5323                 return r;
5324
5325         r = si_pcie_gart_enable(rdev);
5326         if (r)
5327                 return r;
5328         si_gpu_init(rdev);
5329
5330         /* allocate rlc buffers */
5331         r = si_rlc_init(rdev);
5332         if (r) {
5333                 DRM_ERROR("Failed to init rlc BOs!\n");
5334                 return r;
5335         }
5336
5337         /* allocate wb buffer */
5338         r = radeon_wb_init(rdev);
5339         if (r)
5340                 return r;
5341
5342         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5343         if (r) {
5344                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5345                 return r;
5346         }
5347
5348         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5349         if (r) {
5350                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5351                 return r;
5352         }
5353
5354         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5355         if (r) {
5356                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5357                 return r;
5358         }
5359
5360         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5361         if (r) {
5362                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5363                 return r;
5364         }
5365
5366         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5367         if (r) {
5368                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5369                 return r;
5370         }
5371
5372         if (rdev->has_uvd) {
5373                 r = rv770_uvd_resume(rdev);
5374                 if (!r) {
5375                         r = radeon_fence_driver_start_ring(rdev,
5376                                                            R600_RING_TYPE_UVD_INDEX);
5377                         if (r)
5378                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5379                 }
5380                 if (r)
5381                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5382         }
5383
5384         /* Enable IRQ */
5385         if (!rdev->irq.installed) {
5386                 r = radeon_irq_kms_init(rdev);
5387                 if (r)
5388                         return r;
5389         }
5390
5391         r = si_irq_init(rdev);
5392         if (r) {
5393                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5394                 radeon_irq_kms_fini(rdev);
5395                 return r;
5396         }
5397         si_irq_set(rdev);
5398
5399         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5400         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5401                              CP_RB0_RPTR, CP_RB0_WPTR,
5402                              0, 0xfffff, RADEON_CP_PACKET2);
5403         if (r)
5404                 return r;
5405
5406         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5407         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5408                              CP_RB1_RPTR, CP_RB1_WPTR,
5409                              0, 0xfffff, RADEON_CP_PACKET2);
5410         if (r)
5411                 return r;
5412
5413         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5414         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5415                              CP_RB2_RPTR, CP_RB2_WPTR,
5416                              0, 0xfffff, RADEON_CP_PACKET2);
5417         if (r)
5418                 return r;
5419
5420         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5421         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5422                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5423                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5424                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5425         if (r)
5426                 return r;
5427
5428         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5429         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5430                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5431                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5432                              2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5433         if (r)
5434                 return r;
5435
5436         r = si_cp_load_microcode(rdev);
5437         if (r)
5438                 return r;
5439         r = si_cp_resume(rdev);
5440         if (r)
5441                 return r;
5442
5443         r = cayman_dma_resume(rdev);
5444         if (r)
5445                 return r;
5446
5447         if (rdev->has_uvd) {
5448                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5449                 if (ring->ring_size) {
5450                         r = radeon_ring_init(rdev, ring, ring->ring_size,
5451                                              R600_WB_UVD_RPTR_OFFSET,
5452                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5453                                              0, 0xfffff, RADEON_CP_PACKET2);
5454                         if (!r)
5455                                 r = r600_uvd_init(rdev);
5456                         if (r)
5457                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5458                 }
5459         }
5460
5461         r = radeon_ib_pool_init(rdev);
5462         if (r) {
5463                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5464                 return r;
5465         }
5466
5467         r = radeon_vm_manager_init(rdev);
5468         if (r) {
5469                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5470                 return r;
5471         }
5472
5473         return 0;
5474 }
5475
5476 int si_resume(struct radeon_device *rdev)
5477 {
5478         int r;
5479
5480         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5481          * posting will perform necessary task to bring back GPU into good
5482          * shape.
5483          */
5484         /* post card */
5485         atom_asic_init(rdev->mode_info.atom_context);
5486
5487         /* init golden registers */
5488         si_init_golden_registers(rdev);
5489
5490         rdev->accel_working = true;
5491         r = si_startup(rdev);
5492         if (r) {
5493                 DRM_ERROR("si startup failed on resume\n");
5494                 rdev->accel_working = false;
5495                 return r;
5496         }
5497
5498         return r;
5499
5500 }
5501
5502 int si_suspend(struct radeon_device *rdev)
5503 {
5504         radeon_vm_manager_fini(rdev);
5505         si_cp_enable(rdev, false);
5506         cayman_dma_stop(rdev);
5507         if (rdev->has_uvd) {
5508                 r600_uvd_stop(rdev);
5509                 radeon_uvd_suspend(rdev);
5510         }
5511         si_irq_suspend(rdev);
5512         radeon_wb_disable(rdev);
5513         si_pcie_gart_disable(rdev);
5514         return 0;
5515 }
5516
5517 /* Plan is to move initialization in that function and use
5518  * helper function so that radeon_device_init pretty much
5519  * do nothing more than calling asic specific function. This
5520  * should also allow to remove a bunch of callback function
5521  * like vram_info.
5522  */
5523 int si_init(struct radeon_device *rdev)
5524 {
5525         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5526         int r;
5527
5528         /* Read BIOS */
5529         if (!radeon_get_bios(rdev)) {
5530                 if (ASIC_IS_AVIVO(rdev))
5531                         return -EINVAL;
5532         }
5533         /* Must be an ATOMBIOS */
5534         if (!rdev->is_atom_bios) {
5535                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5536                 return -EINVAL;
5537         }
5538         r = radeon_atombios_init(rdev);
5539         if (r)
5540                 return r;
5541
5542         /* Post card if necessary */
5543         if (!radeon_card_posted(rdev)) {
5544                 if (!rdev->bios) {
5545                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5546                         return -EINVAL;
5547                 }
5548                 DRM_INFO("GPU not posted. posting now...\n");
5549                 atom_asic_init(rdev->mode_info.atom_context);
5550         }
5551         /* init golden registers */
5552         si_init_golden_registers(rdev);
5553         /* Initialize scratch registers */
5554         si_scratch_init(rdev);
5555         /* Initialize surface registers */
5556         radeon_surface_init(rdev);
5557         /* Initialize clocks */
5558         radeon_get_clock_info(rdev->ddev);
5559
5560         /* Fence driver */
5561         r = radeon_fence_driver_init(rdev);
5562         if (r)
5563                 return r;
5564
5565         /* initialize memory controller */
5566         r = si_mc_init(rdev);
5567         if (r)
5568                 return r;
5569         /* Memory manager */
5570         r = radeon_bo_init(rdev);
5571         if (r)
5572                 return r;
5573
5574         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5575         ring->ring_obj = NULL;
5576         r600_ring_init(rdev, ring, 1024 * 1024);
5577
5578         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5579         ring->ring_obj = NULL;
5580         r600_ring_init(rdev, ring, 1024 * 1024);
5581
5582         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5583         ring->ring_obj = NULL;
5584         r600_ring_init(rdev, ring, 1024 * 1024);
5585
5586         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5587         ring->ring_obj = NULL;
5588         r600_ring_init(rdev, ring, 64 * 1024);
5589
5590         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5591         ring->ring_obj = NULL;
5592         r600_ring_init(rdev, ring, 64 * 1024);
5593
5594         if (rdev->has_uvd) {
5595                 r = radeon_uvd_init(rdev);
5596                 if (!r) {
5597                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5598                         ring->ring_obj = NULL;
5599                         r600_ring_init(rdev, ring, 4096);
5600                 }
5601         }
5602
5603         rdev->ih.ring_obj = NULL;
5604         r600_ih_ring_init(rdev, 64 * 1024);
5605
5606         r = r600_pcie_gart_init(rdev);
5607         if (r)
5608                 return r;
5609
5610         rdev->accel_working = true;
5611         r = si_startup(rdev);
5612         if (r) {
5613                 dev_err(rdev->dev, "disabling GPU acceleration\n");
5614                 si_cp_fini(rdev);
5615                 cayman_dma_fini(rdev);
5616                 si_irq_fini(rdev);
5617                 si_rlc_fini(rdev);
5618                 radeon_wb_fini(rdev);
5619                 radeon_ib_pool_fini(rdev);
5620                 radeon_vm_manager_fini(rdev);
5621                 radeon_irq_kms_fini(rdev);
5622                 si_pcie_gart_fini(rdev);
5623                 rdev->accel_working = false;
5624         }
5625
5626         /* Don't start up if the MC ucode is missing.
5627          * The default clocks and voltages before the MC ucode
5628          * is loaded are not suffient for advanced operations.
5629          */
5630         if (!rdev->mc_fw) {
5631                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5632                 return -EINVAL;
5633         }
5634
5635         return 0;
5636 }
5637
5638 void si_fini(struct radeon_device *rdev)
5639 {
5640         si_cp_fini(rdev);
5641         cayman_dma_fini(rdev);
5642         si_irq_fini(rdev);
5643         si_rlc_fini(rdev);
5644         radeon_wb_fini(rdev);
5645         radeon_vm_manager_fini(rdev);
5646         radeon_ib_pool_fini(rdev);
5647         radeon_irq_kms_fini(rdev);
5648         if (rdev->has_uvd) {
5649                 r600_uvd_stop(rdev);
5650                 radeon_uvd_fini(rdev);
5651         }
5652         si_pcie_gart_fini(rdev);
5653         r600_vram_scratch_fini(rdev);
5654         radeon_gem_fini(rdev);
5655         radeon_fence_driver_fini(rdev);
5656         radeon_bo_fini(rdev);
5657         radeon_atombios_fini(rdev);
5658         kfree(rdev->bios);
5659         rdev->bios = NULL;
5660 }
5661
5662 /**
5663  * si_get_gpu_clock_counter - return GPU clock counter snapshot
5664  *
5665  * @rdev: radeon_device pointer
5666  *
5667  * Fetches a GPU clock counter snapshot (SI).
5668  * Returns the 64 bit clock counter snapshot.
5669  */
5670 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5671 {
5672         uint64_t clock;
5673
5674         mutex_lock(&rdev->gpu_clock_mutex);
5675         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5676         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5677                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5678         mutex_unlock(&rdev->gpu_clock_mutex);
5679         return clock;
5680 }
5681
5682 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5683 {
5684         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5685         int r;
5686
5687         /* bypass vclk and dclk with bclk */
5688         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5689                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5690                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5691
5692         /* put PLL in bypass mode */
5693         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5694
5695         if (!vclk || !dclk) {
5696                 /* keep the Bypass mode, put PLL to sleep */
5697                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5698                 return 0;
5699         }
5700
5701         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5702                                           16384, 0x03FFFFFF, 0, 128, 5,
5703                                           &fb_div, &vclk_div, &dclk_div);
5704         if (r)
5705                 return r;
5706
5707         /* set RESET_ANTI_MUX to 0 */
5708         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5709
5710         /* set VCO_MODE to 1 */
5711         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5712
5713         /* toggle UPLL_SLEEP to 1 then back to 0 */
5714         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5715         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5716
5717         /* deassert UPLL_RESET */
5718         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5719
5720         mdelay(1);
5721
5722         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5723         if (r)
5724                 return r;
5725
5726         /* assert UPLL_RESET again */
5727         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5728
5729         /* disable spread spectrum. */
5730         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5731
5732         /* set feedback divider */
5733         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5734
5735         /* set ref divider to 0 */
5736         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5737
5738         if (fb_div < 307200)
5739                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5740         else
5741                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5742
5743         /* set PDIV_A and PDIV_B */
5744         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5745                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5746                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5747
5748         /* give the PLL some time to settle */
5749         mdelay(15);
5750
5751         /* deassert PLL_RESET */
5752         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5753
5754         mdelay(15);
5755
5756         /* switch from bypass mode to normal mode */
5757         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5758
5759         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5760         if (r)
5761                 return r;
5762
5763         /* switch VCLK and DCLK selection */
5764         WREG32_P(CG_UPLL_FUNC_CNTL_2,
5765                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5766                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5767
5768         mdelay(100);
5769
5770         return 0;
5771 }