Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[linux-drm-fsl-dcu.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
82                                          bool enable);
83 static void si_fini_pg(struct radeon_device *rdev);
84 static void si_fini_cg(struct radeon_device *rdev);
85 static void si_rlc_stop(struct radeon_device *rdev);
86
87 static const u32 verde_rlc_save_restore_register_list[] =
88 {
89         (0x8000 << 16) | (0x98f4 >> 2),
90         0x00000000,
91         (0x8040 << 16) | (0x98f4 >> 2),
92         0x00000000,
93         (0x8000 << 16) | (0xe80 >> 2),
94         0x00000000,
95         (0x8040 << 16) | (0xe80 >> 2),
96         0x00000000,
97         (0x8000 << 16) | (0x89bc >> 2),
98         0x00000000,
99         (0x8040 << 16) | (0x89bc >> 2),
100         0x00000000,
101         (0x8000 << 16) | (0x8c1c >> 2),
102         0x00000000,
103         (0x8040 << 16) | (0x8c1c >> 2),
104         0x00000000,
105         (0x9c00 << 16) | (0x98f0 >> 2),
106         0x00000000,
107         (0x9c00 << 16) | (0xe7c >> 2),
108         0x00000000,
109         (0x8000 << 16) | (0x9148 >> 2),
110         0x00000000,
111         (0x8040 << 16) | (0x9148 >> 2),
112         0x00000000,
113         (0x9c00 << 16) | (0x9150 >> 2),
114         0x00000000,
115         (0x9c00 << 16) | (0x897c >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x8d8c >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0xac54 >> 2),
120         0X00000000,
121         0x3,
122         (0x9c00 << 16) | (0x98f8 >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x9910 >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x9914 >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x9918 >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x991c >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x9920 >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x9924 >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x9928 >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x992c >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x9930 >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x9934 >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x9938 >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x993c >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x9940 >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x9944 >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x9948 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x994c >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x9950 >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x9954 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9958 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x995c >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x9960 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9964 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9968 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x996c >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9970 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9974 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9978 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x997c >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9980 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9984 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9988 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x998c >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x8c00 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x8c14 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x8c04 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x8c08 >> 2),
195         0x00000000,
196         (0x8000 << 16) | (0x9b7c >> 2),
197         0x00000000,
198         (0x8040 << 16) | (0x9b7c >> 2),
199         0x00000000,
200         (0x8000 << 16) | (0xe84 >> 2),
201         0x00000000,
202         (0x8040 << 16) | (0xe84 >> 2),
203         0x00000000,
204         (0x8000 << 16) | (0x89c0 >> 2),
205         0x00000000,
206         (0x8040 << 16) | (0x89c0 >> 2),
207         0x00000000,
208         (0x8000 << 16) | (0x914c >> 2),
209         0x00000000,
210         (0x8040 << 16) | (0x914c >> 2),
211         0x00000000,
212         (0x8000 << 16) | (0x8c20 >> 2),
213         0x00000000,
214         (0x8040 << 16) | (0x8c20 >> 2),
215         0x00000000,
216         (0x8000 << 16) | (0x9354 >> 2),
217         0x00000000,
218         (0x8040 << 16) | (0x9354 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9060 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9364 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9100 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x913c >> 2),
227         0x00000000,
228         (0x8000 << 16) | (0x90e0 >> 2),
229         0x00000000,
230         (0x8000 << 16) | (0x90e4 >> 2),
231         0x00000000,
232         (0x8000 << 16) | (0x90e8 >> 2),
233         0x00000000,
234         (0x8040 << 16) | (0x90e0 >> 2),
235         0x00000000,
236         (0x8040 << 16) | (0x90e4 >> 2),
237         0x00000000,
238         (0x8040 << 16) | (0x90e8 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8bcc >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8b24 >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x88c4 >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x8e50 >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x8c0c >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x8e58 >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x8e5c >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0x9508 >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0x950c >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0x9494 >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0xac0c >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0xac10 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0xac14 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0xae00 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0xac08 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x88d4 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x88c8 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x88cc >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x89b0 >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x8b10 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x8a14 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x9830 >> 2),
283         0x00000000,
284         (0x9c00 << 16) | (0x9834 >> 2),
285         0x00000000,
286         (0x9c00 << 16) | (0x9838 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x9a10 >> 2),
289         0x00000000,
290         (0x8000 << 16) | (0x9870 >> 2),
291         0x00000000,
292         (0x8000 << 16) | (0x9874 >> 2),
293         0x00000000,
294         (0x8001 << 16) | (0x9870 >> 2),
295         0x00000000,
296         (0x8001 << 16) | (0x9874 >> 2),
297         0x00000000,
298         (0x8040 << 16) | (0x9870 >> 2),
299         0x00000000,
300         (0x8040 << 16) | (0x9874 >> 2),
301         0x00000000,
302         (0x8041 << 16) | (0x9870 >> 2),
303         0x00000000,
304         (0x8041 << 16) | (0x9874 >> 2),
305         0x00000000,
306         0x00000000
307 };
308
309 static const u32 tahiti_golden_rlc_registers[] =
310 {
311         0xc424, 0xffffffff, 0x00601005,
312         0xc47c, 0xffffffff, 0x10104040,
313         0xc488, 0xffffffff, 0x0100000a,
314         0xc314, 0xffffffff, 0x00000800,
315         0xc30c, 0xffffffff, 0x800000f4,
316         0xf4a8, 0xffffffff, 0x00000000
317 };
318
319 static const u32 tahiti_golden_registers[] =
320 {
321         0x9a10, 0x00010000, 0x00018208,
322         0x9830, 0xffffffff, 0x00000000,
323         0x9834, 0xf00fffff, 0x00000400,
324         0x9838, 0x0002021c, 0x00020200,
325         0xc78, 0x00000080, 0x00000000,
326         0xd030, 0x000300c0, 0x00800040,
327         0xd830, 0x000300c0, 0x00800040,
328         0x5bb0, 0x000000f0, 0x00000070,
329         0x5bc0, 0x00200000, 0x50100000,
330         0x7030, 0x31000311, 0x00000011,
331         0x277c, 0x00000003, 0x000007ff,
332         0x240c, 0x000007ff, 0x00000000,
333         0x8a14, 0xf000001f, 0x00000007,
334         0x8b24, 0xffffffff, 0x00ffffff,
335         0x8b10, 0x0000ff0f, 0x00000000,
336         0x28a4c, 0x07ffffff, 0x4e000000,
337         0x28350, 0x3f3f3fff, 0x2a00126a,
338         0x30, 0x000000ff, 0x0040,
339         0x34, 0x00000040, 0x00004040,
340         0x9100, 0x07ffffff, 0x03000000,
341         0x8e88, 0x01ff1f3f, 0x00000000,
342         0x8e84, 0x01ff1f3f, 0x00000000,
343         0x9060, 0x0000007f, 0x00000020,
344         0x9508, 0x00010000, 0x00010000,
345         0xac14, 0x00000200, 0x000002fb,
346         0xac10, 0xffffffff, 0x0000543b,
347         0xac0c, 0xffffffff, 0xa9210876,
348         0x88d0, 0xffffffff, 0x000fff40,
349         0x88d4, 0x0000001f, 0x00000010,
350         0x1410, 0x20000000, 0x20fffed8,
351         0x15c0, 0x000c0fc0, 0x000c0400
352 };
353
354 static const u32 tahiti_golden_registers2[] =
355 {
356         0xc64, 0x00000001, 0x00000001
357 };
358
359 static const u32 pitcairn_golden_rlc_registers[] =
360 {
361         0xc424, 0xffffffff, 0x00601004,
362         0xc47c, 0xffffffff, 0x10102020,
363         0xc488, 0xffffffff, 0x01000020,
364         0xc314, 0xffffffff, 0x00000800,
365         0xc30c, 0xffffffff, 0x800000a4
366 };
367
368 static const u32 pitcairn_golden_registers[] =
369 {
370         0x9a10, 0x00010000, 0x00018208,
371         0x9830, 0xffffffff, 0x00000000,
372         0x9834, 0xf00fffff, 0x00000400,
373         0x9838, 0x0002021c, 0x00020200,
374         0xc78, 0x00000080, 0x00000000,
375         0xd030, 0x000300c0, 0x00800040,
376         0xd830, 0x000300c0, 0x00800040,
377         0x5bb0, 0x000000f0, 0x00000070,
378         0x5bc0, 0x00200000, 0x50100000,
379         0x7030, 0x31000311, 0x00000011,
380         0x2ae4, 0x00073ffe, 0x000022a2,
381         0x240c, 0x000007ff, 0x00000000,
382         0x8a14, 0xf000001f, 0x00000007,
383         0x8b24, 0xffffffff, 0x00ffffff,
384         0x8b10, 0x0000ff0f, 0x00000000,
385         0x28a4c, 0x07ffffff, 0x4e000000,
386         0x28350, 0x3f3f3fff, 0x2a00126a,
387         0x30, 0x000000ff, 0x0040,
388         0x34, 0x00000040, 0x00004040,
389         0x9100, 0x07ffffff, 0x03000000,
390         0x9060, 0x0000007f, 0x00000020,
391         0x9508, 0x00010000, 0x00010000,
392         0xac14, 0x000003ff, 0x000000f7,
393         0xac10, 0xffffffff, 0x00000000,
394         0xac0c, 0xffffffff, 0x32761054,
395         0x88d4, 0x0000001f, 0x00000010,
396         0x15c0, 0x000c0fc0, 0x000c0400
397 };
398
399 static const u32 verde_golden_rlc_registers[] =
400 {
401         0xc424, 0xffffffff, 0x033f1005,
402         0xc47c, 0xffffffff, 0x10808020,
403         0xc488, 0xffffffff, 0x00800008,
404         0xc314, 0xffffffff, 0x00001000,
405         0xc30c, 0xffffffff, 0x80010014
406 };
407
408 static const u32 verde_golden_registers[] =
409 {
410         0x9a10, 0x00010000, 0x00018208,
411         0x9830, 0xffffffff, 0x00000000,
412         0x9834, 0xf00fffff, 0x00000400,
413         0x9838, 0x0002021c, 0x00020200,
414         0xc78, 0x00000080, 0x00000000,
415         0xd030, 0x000300c0, 0x00800040,
416         0xd030, 0x000300c0, 0x00800040,
417         0xd830, 0x000300c0, 0x00800040,
418         0xd830, 0x000300c0, 0x00800040,
419         0x5bb0, 0x000000f0, 0x00000070,
420         0x5bc0, 0x00200000, 0x50100000,
421         0x7030, 0x31000311, 0x00000011,
422         0x2ae4, 0x00073ffe, 0x000022a2,
423         0x2ae4, 0x00073ffe, 0x000022a2,
424         0x2ae4, 0x00073ffe, 0x000022a2,
425         0x240c, 0x000007ff, 0x00000000,
426         0x240c, 0x000007ff, 0x00000000,
427         0x240c, 0x000007ff, 0x00000000,
428         0x8a14, 0xf000001f, 0x00000007,
429         0x8a14, 0xf000001f, 0x00000007,
430         0x8a14, 0xf000001f, 0x00000007,
431         0x8b24, 0xffffffff, 0x00ffffff,
432         0x8b10, 0x0000ff0f, 0x00000000,
433         0x28a4c, 0x07ffffff, 0x4e000000,
434         0x28350, 0x3f3f3fff, 0x0000124a,
435         0x28350, 0x3f3f3fff, 0x0000124a,
436         0x28350, 0x3f3f3fff, 0x0000124a,
437         0x30, 0x000000ff, 0x0040,
438         0x34, 0x00000040, 0x00004040,
439         0x9100, 0x07ffffff, 0x03000000,
440         0x9100, 0x07ffffff, 0x03000000,
441         0x8e88, 0x01ff1f3f, 0x00000000,
442         0x8e88, 0x01ff1f3f, 0x00000000,
443         0x8e88, 0x01ff1f3f, 0x00000000,
444         0x8e84, 0x01ff1f3f, 0x00000000,
445         0x8e84, 0x01ff1f3f, 0x00000000,
446         0x8e84, 0x01ff1f3f, 0x00000000,
447         0x9060, 0x0000007f, 0x00000020,
448         0x9508, 0x00010000, 0x00010000,
449         0xac14, 0x000003ff, 0x00000003,
450         0xac14, 0x000003ff, 0x00000003,
451         0xac14, 0x000003ff, 0x00000003,
452         0xac10, 0xffffffff, 0x00000000,
453         0xac10, 0xffffffff, 0x00000000,
454         0xac10, 0xffffffff, 0x00000000,
455         0xac0c, 0xffffffff, 0x00001032,
456         0xac0c, 0xffffffff, 0x00001032,
457         0xac0c, 0xffffffff, 0x00001032,
458         0x88d4, 0x0000001f, 0x00000010,
459         0x88d4, 0x0000001f, 0x00000010,
460         0x88d4, 0x0000001f, 0x00000010,
461         0x15c0, 0x000c0fc0, 0x000c0400
462 };
463
464 static const u32 oland_golden_rlc_registers[] =
465 {
466         0xc424, 0xffffffff, 0x00601005,
467         0xc47c, 0xffffffff, 0x10104040,
468         0xc488, 0xffffffff, 0x0100000a,
469         0xc314, 0xffffffff, 0x00000800,
470         0xc30c, 0xffffffff, 0x800000f4
471 };
472
473 static const u32 oland_golden_registers[] =
474 {
475         0x9a10, 0x00010000, 0x00018208,
476         0x9830, 0xffffffff, 0x00000000,
477         0x9834, 0xf00fffff, 0x00000400,
478         0x9838, 0x0002021c, 0x00020200,
479         0xc78, 0x00000080, 0x00000000,
480         0xd030, 0x000300c0, 0x00800040,
481         0xd830, 0x000300c0, 0x00800040,
482         0x5bb0, 0x000000f0, 0x00000070,
483         0x5bc0, 0x00200000, 0x50100000,
484         0x7030, 0x31000311, 0x00000011,
485         0x2ae4, 0x00073ffe, 0x000022a2,
486         0x240c, 0x000007ff, 0x00000000,
487         0x8a14, 0xf000001f, 0x00000007,
488         0x8b24, 0xffffffff, 0x00ffffff,
489         0x8b10, 0x0000ff0f, 0x00000000,
490         0x28a4c, 0x07ffffff, 0x4e000000,
491         0x28350, 0x3f3f3fff, 0x00000082,
492         0x30, 0x000000ff, 0x0040,
493         0x34, 0x00000040, 0x00004040,
494         0x9100, 0x07ffffff, 0x03000000,
495         0x9060, 0x0000007f, 0x00000020,
496         0x9508, 0x00010000, 0x00010000,
497         0xac14, 0x000003ff, 0x000000f3,
498         0xac10, 0xffffffff, 0x00000000,
499         0xac0c, 0xffffffff, 0x00003210,
500         0x88d4, 0x0000001f, 0x00000010,
501         0x15c0, 0x000c0fc0, 0x000c0400
502 };
503
504 static const u32 hainan_golden_registers[] =
505 {
506         0x9a10, 0x00010000, 0x00018208,
507         0x9830, 0xffffffff, 0x00000000,
508         0x9834, 0xf00fffff, 0x00000400,
509         0x9838, 0x0002021c, 0x00020200,
510         0xd0c0, 0xff000fff, 0x00000100,
511         0xd030, 0x000300c0, 0x00800040,
512         0xd8c0, 0xff000fff, 0x00000100,
513         0xd830, 0x000300c0, 0x00800040,
514         0x2ae4, 0x00073ffe, 0x000022a2,
515         0x240c, 0x000007ff, 0x00000000,
516         0x8a14, 0xf000001f, 0x00000007,
517         0x8b24, 0xffffffff, 0x00ffffff,
518         0x8b10, 0x0000ff0f, 0x00000000,
519         0x28a4c, 0x07ffffff, 0x4e000000,
520         0x28350, 0x3f3f3fff, 0x00000000,
521         0x30, 0x000000ff, 0x0040,
522         0x34, 0x00000040, 0x00004040,
523         0x9100, 0x03e00000, 0x03600000,
524         0x9060, 0x0000007f, 0x00000020,
525         0x9508, 0x00010000, 0x00010000,
526         0xac14, 0x000003ff, 0x000000f1,
527         0xac10, 0xffffffff, 0x00000000,
528         0xac0c, 0xffffffff, 0x00003210,
529         0x88d4, 0x0000001f, 0x00000010,
530         0x15c0, 0x000c0fc0, 0x000c0400
531 };
532
533 static const u32 hainan_golden_registers2[] =
534 {
535         0x98f8, 0xffffffff, 0x02010001
536 };
537
538 static const u32 tahiti_mgcg_cgcg_init[] =
539 {
540         0xc400, 0xffffffff, 0xfffffffc,
541         0x802c, 0xffffffff, 0xe0000000,
542         0x9a60, 0xffffffff, 0x00000100,
543         0x92a4, 0xffffffff, 0x00000100,
544         0xc164, 0xffffffff, 0x00000100,
545         0x9774, 0xffffffff, 0x00000100,
546         0x8984, 0xffffffff, 0x06000100,
547         0x8a18, 0xffffffff, 0x00000100,
548         0x92a0, 0xffffffff, 0x00000100,
549         0xc380, 0xffffffff, 0x00000100,
550         0x8b28, 0xffffffff, 0x00000100,
551         0x9144, 0xffffffff, 0x00000100,
552         0x8d88, 0xffffffff, 0x00000100,
553         0x8d8c, 0xffffffff, 0x00000100,
554         0x9030, 0xffffffff, 0x00000100,
555         0x9034, 0xffffffff, 0x00000100,
556         0x9038, 0xffffffff, 0x00000100,
557         0x903c, 0xffffffff, 0x00000100,
558         0xad80, 0xffffffff, 0x00000100,
559         0xac54, 0xffffffff, 0x00000100,
560         0x897c, 0xffffffff, 0x06000100,
561         0x9868, 0xffffffff, 0x00000100,
562         0x9510, 0xffffffff, 0x00000100,
563         0xaf04, 0xffffffff, 0x00000100,
564         0xae04, 0xffffffff, 0x00000100,
565         0x949c, 0xffffffff, 0x00000100,
566         0x802c, 0xffffffff, 0xe0000000,
567         0x9160, 0xffffffff, 0x00010000,
568         0x9164, 0xffffffff, 0x00030002,
569         0x9168, 0xffffffff, 0x00040007,
570         0x916c, 0xffffffff, 0x00060005,
571         0x9170, 0xffffffff, 0x00090008,
572         0x9174, 0xffffffff, 0x00020001,
573         0x9178, 0xffffffff, 0x00040003,
574         0x917c, 0xffffffff, 0x00000007,
575         0x9180, 0xffffffff, 0x00060005,
576         0x9184, 0xffffffff, 0x00090008,
577         0x9188, 0xffffffff, 0x00030002,
578         0x918c, 0xffffffff, 0x00050004,
579         0x9190, 0xffffffff, 0x00000008,
580         0x9194, 0xffffffff, 0x00070006,
581         0x9198, 0xffffffff, 0x000a0009,
582         0x919c, 0xffffffff, 0x00040003,
583         0x91a0, 0xffffffff, 0x00060005,
584         0x91a4, 0xffffffff, 0x00000009,
585         0x91a8, 0xffffffff, 0x00080007,
586         0x91ac, 0xffffffff, 0x000b000a,
587         0x91b0, 0xffffffff, 0x00050004,
588         0x91b4, 0xffffffff, 0x00070006,
589         0x91b8, 0xffffffff, 0x0008000b,
590         0x91bc, 0xffffffff, 0x000a0009,
591         0x91c0, 0xffffffff, 0x000d000c,
592         0x91c4, 0xffffffff, 0x00060005,
593         0x91c8, 0xffffffff, 0x00080007,
594         0x91cc, 0xffffffff, 0x0000000b,
595         0x91d0, 0xffffffff, 0x000a0009,
596         0x91d4, 0xffffffff, 0x000d000c,
597         0x91d8, 0xffffffff, 0x00070006,
598         0x91dc, 0xffffffff, 0x00090008,
599         0x91e0, 0xffffffff, 0x0000000c,
600         0x91e4, 0xffffffff, 0x000b000a,
601         0x91e8, 0xffffffff, 0x000e000d,
602         0x91ec, 0xffffffff, 0x00080007,
603         0x91f0, 0xffffffff, 0x000a0009,
604         0x91f4, 0xffffffff, 0x0000000d,
605         0x91f8, 0xffffffff, 0x000c000b,
606         0x91fc, 0xffffffff, 0x000f000e,
607         0x9200, 0xffffffff, 0x00090008,
608         0x9204, 0xffffffff, 0x000b000a,
609         0x9208, 0xffffffff, 0x000c000f,
610         0x920c, 0xffffffff, 0x000e000d,
611         0x9210, 0xffffffff, 0x00110010,
612         0x9214, 0xffffffff, 0x000a0009,
613         0x9218, 0xffffffff, 0x000c000b,
614         0x921c, 0xffffffff, 0x0000000f,
615         0x9220, 0xffffffff, 0x000e000d,
616         0x9224, 0xffffffff, 0x00110010,
617         0x9228, 0xffffffff, 0x000b000a,
618         0x922c, 0xffffffff, 0x000d000c,
619         0x9230, 0xffffffff, 0x00000010,
620         0x9234, 0xffffffff, 0x000f000e,
621         0x9238, 0xffffffff, 0x00120011,
622         0x923c, 0xffffffff, 0x000c000b,
623         0x9240, 0xffffffff, 0x000e000d,
624         0x9244, 0xffffffff, 0x00000011,
625         0x9248, 0xffffffff, 0x0010000f,
626         0x924c, 0xffffffff, 0x00130012,
627         0x9250, 0xffffffff, 0x000d000c,
628         0x9254, 0xffffffff, 0x000f000e,
629         0x9258, 0xffffffff, 0x00100013,
630         0x925c, 0xffffffff, 0x00120011,
631         0x9260, 0xffffffff, 0x00150014,
632         0x9264, 0xffffffff, 0x000e000d,
633         0x9268, 0xffffffff, 0x0010000f,
634         0x926c, 0xffffffff, 0x00000013,
635         0x9270, 0xffffffff, 0x00120011,
636         0x9274, 0xffffffff, 0x00150014,
637         0x9278, 0xffffffff, 0x000f000e,
638         0x927c, 0xffffffff, 0x00110010,
639         0x9280, 0xffffffff, 0x00000014,
640         0x9284, 0xffffffff, 0x00130012,
641         0x9288, 0xffffffff, 0x00160015,
642         0x928c, 0xffffffff, 0x0010000f,
643         0x9290, 0xffffffff, 0x00120011,
644         0x9294, 0xffffffff, 0x00000015,
645         0x9298, 0xffffffff, 0x00140013,
646         0x929c, 0xffffffff, 0x00170016,
647         0x9150, 0xffffffff, 0x96940200,
648         0x8708, 0xffffffff, 0x00900100,
649         0xc478, 0xffffffff, 0x00000080,
650         0xc404, 0xffffffff, 0x0020003f,
651         0x30, 0xffffffff, 0x0000001c,
652         0x34, 0x000f0000, 0x000f0000,
653         0x160c, 0xffffffff, 0x00000100,
654         0x1024, 0xffffffff, 0x00000100,
655         0x102c, 0x00000101, 0x00000000,
656         0x20a8, 0xffffffff, 0x00000104,
657         0x264c, 0x000c0000, 0x000c0000,
658         0x2648, 0x000c0000, 0x000c0000,
659         0x55e4, 0xff000fff, 0x00000100,
660         0x55e8, 0x00000001, 0x00000001,
661         0x2f50, 0x00000001, 0x00000001,
662         0x30cc, 0xc0000fff, 0x00000104,
663         0xc1e4, 0x00000001, 0x00000001,
664         0xd0c0, 0xfffffff0, 0x00000100,
665         0xd8c0, 0xfffffff0, 0x00000100
666 };
667
668 static const u32 pitcairn_mgcg_cgcg_init[] =
669 {
670         0xc400, 0xffffffff, 0xfffffffc,
671         0x802c, 0xffffffff, 0xe0000000,
672         0x9a60, 0xffffffff, 0x00000100,
673         0x92a4, 0xffffffff, 0x00000100,
674         0xc164, 0xffffffff, 0x00000100,
675         0x9774, 0xffffffff, 0x00000100,
676         0x8984, 0xffffffff, 0x06000100,
677         0x8a18, 0xffffffff, 0x00000100,
678         0x92a0, 0xffffffff, 0x00000100,
679         0xc380, 0xffffffff, 0x00000100,
680         0x8b28, 0xffffffff, 0x00000100,
681         0x9144, 0xffffffff, 0x00000100,
682         0x8d88, 0xffffffff, 0x00000100,
683         0x8d8c, 0xffffffff, 0x00000100,
684         0x9030, 0xffffffff, 0x00000100,
685         0x9034, 0xffffffff, 0x00000100,
686         0x9038, 0xffffffff, 0x00000100,
687         0x903c, 0xffffffff, 0x00000100,
688         0xad80, 0xffffffff, 0x00000100,
689         0xac54, 0xffffffff, 0x00000100,
690         0x897c, 0xffffffff, 0x06000100,
691         0x9868, 0xffffffff, 0x00000100,
692         0x9510, 0xffffffff, 0x00000100,
693         0xaf04, 0xffffffff, 0x00000100,
694         0xae04, 0xffffffff, 0x00000100,
695         0x949c, 0xffffffff, 0x00000100,
696         0x802c, 0xffffffff, 0xe0000000,
697         0x9160, 0xffffffff, 0x00010000,
698         0x9164, 0xffffffff, 0x00030002,
699         0x9168, 0xffffffff, 0x00040007,
700         0x916c, 0xffffffff, 0x00060005,
701         0x9170, 0xffffffff, 0x00090008,
702         0x9174, 0xffffffff, 0x00020001,
703         0x9178, 0xffffffff, 0x00040003,
704         0x917c, 0xffffffff, 0x00000007,
705         0x9180, 0xffffffff, 0x00060005,
706         0x9184, 0xffffffff, 0x00090008,
707         0x9188, 0xffffffff, 0x00030002,
708         0x918c, 0xffffffff, 0x00050004,
709         0x9190, 0xffffffff, 0x00000008,
710         0x9194, 0xffffffff, 0x00070006,
711         0x9198, 0xffffffff, 0x000a0009,
712         0x919c, 0xffffffff, 0x00040003,
713         0x91a0, 0xffffffff, 0x00060005,
714         0x91a4, 0xffffffff, 0x00000009,
715         0x91a8, 0xffffffff, 0x00080007,
716         0x91ac, 0xffffffff, 0x000b000a,
717         0x91b0, 0xffffffff, 0x00050004,
718         0x91b4, 0xffffffff, 0x00070006,
719         0x91b8, 0xffffffff, 0x0008000b,
720         0x91bc, 0xffffffff, 0x000a0009,
721         0x91c0, 0xffffffff, 0x000d000c,
722         0x9200, 0xffffffff, 0x00090008,
723         0x9204, 0xffffffff, 0x000b000a,
724         0x9208, 0xffffffff, 0x000c000f,
725         0x920c, 0xffffffff, 0x000e000d,
726         0x9210, 0xffffffff, 0x00110010,
727         0x9214, 0xffffffff, 0x000a0009,
728         0x9218, 0xffffffff, 0x000c000b,
729         0x921c, 0xffffffff, 0x0000000f,
730         0x9220, 0xffffffff, 0x000e000d,
731         0x9224, 0xffffffff, 0x00110010,
732         0x9228, 0xffffffff, 0x000b000a,
733         0x922c, 0xffffffff, 0x000d000c,
734         0x9230, 0xffffffff, 0x00000010,
735         0x9234, 0xffffffff, 0x000f000e,
736         0x9238, 0xffffffff, 0x00120011,
737         0x923c, 0xffffffff, 0x000c000b,
738         0x9240, 0xffffffff, 0x000e000d,
739         0x9244, 0xffffffff, 0x00000011,
740         0x9248, 0xffffffff, 0x0010000f,
741         0x924c, 0xffffffff, 0x00130012,
742         0x9250, 0xffffffff, 0x000d000c,
743         0x9254, 0xffffffff, 0x000f000e,
744         0x9258, 0xffffffff, 0x00100013,
745         0x925c, 0xffffffff, 0x00120011,
746         0x9260, 0xffffffff, 0x00150014,
747         0x9150, 0xffffffff, 0x96940200,
748         0x8708, 0xffffffff, 0x00900100,
749         0xc478, 0xffffffff, 0x00000080,
750         0xc404, 0xffffffff, 0x0020003f,
751         0x30, 0xffffffff, 0x0000001c,
752         0x34, 0x000f0000, 0x000f0000,
753         0x160c, 0xffffffff, 0x00000100,
754         0x1024, 0xffffffff, 0x00000100,
755         0x102c, 0x00000101, 0x00000000,
756         0x20a8, 0xffffffff, 0x00000104,
757         0x55e4, 0xff000fff, 0x00000100,
758         0x55e8, 0x00000001, 0x00000001,
759         0x2f50, 0x00000001, 0x00000001,
760         0x30cc, 0xc0000fff, 0x00000104,
761         0xc1e4, 0x00000001, 0x00000001,
762         0xd0c0, 0xfffffff0, 0x00000100,
763         0xd8c0, 0xfffffff0, 0x00000100
764 };
765
766 static const u32 verde_mgcg_cgcg_init[] =
767 {
768         0xc400, 0xffffffff, 0xfffffffc,
769         0x802c, 0xffffffff, 0xe0000000,
770         0x9a60, 0xffffffff, 0x00000100,
771         0x92a4, 0xffffffff, 0x00000100,
772         0xc164, 0xffffffff, 0x00000100,
773         0x9774, 0xffffffff, 0x00000100,
774         0x8984, 0xffffffff, 0x06000100,
775         0x8a18, 0xffffffff, 0x00000100,
776         0x92a0, 0xffffffff, 0x00000100,
777         0xc380, 0xffffffff, 0x00000100,
778         0x8b28, 0xffffffff, 0x00000100,
779         0x9144, 0xffffffff, 0x00000100,
780         0x8d88, 0xffffffff, 0x00000100,
781         0x8d8c, 0xffffffff, 0x00000100,
782         0x9030, 0xffffffff, 0x00000100,
783         0x9034, 0xffffffff, 0x00000100,
784         0x9038, 0xffffffff, 0x00000100,
785         0x903c, 0xffffffff, 0x00000100,
786         0xad80, 0xffffffff, 0x00000100,
787         0xac54, 0xffffffff, 0x00000100,
788         0x897c, 0xffffffff, 0x06000100,
789         0x9868, 0xffffffff, 0x00000100,
790         0x9510, 0xffffffff, 0x00000100,
791         0xaf04, 0xffffffff, 0x00000100,
792         0xae04, 0xffffffff, 0x00000100,
793         0x949c, 0xffffffff, 0x00000100,
794         0x802c, 0xffffffff, 0xe0000000,
795         0x9160, 0xffffffff, 0x00010000,
796         0x9164, 0xffffffff, 0x00030002,
797         0x9168, 0xffffffff, 0x00040007,
798         0x916c, 0xffffffff, 0x00060005,
799         0x9170, 0xffffffff, 0x00090008,
800         0x9174, 0xffffffff, 0x00020001,
801         0x9178, 0xffffffff, 0x00040003,
802         0x917c, 0xffffffff, 0x00000007,
803         0x9180, 0xffffffff, 0x00060005,
804         0x9184, 0xffffffff, 0x00090008,
805         0x9188, 0xffffffff, 0x00030002,
806         0x918c, 0xffffffff, 0x00050004,
807         0x9190, 0xffffffff, 0x00000008,
808         0x9194, 0xffffffff, 0x00070006,
809         0x9198, 0xffffffff, 0x000a0009,
810         0x919c, 0xffffffff, 0x00040003,
811         0x91a0, 0xffffffff, 0x00060005,
812         0x91a4, 0xffffffff, 0x00000009,
813         0x91a8, 0xffffffff, 0x00080007,
814         0x91ac, 0xffffffff, 0x000b000a,
815         0x91b0, 0xffffffff, 0x00050004,
816         0x91b4, 0xffffffff, 0x00070006,
817         0x91b8, 0xffffffff, 0x0008000b,
818         0x91bc, 0xffffffff, 0x000a0009,
819         0x91c0, 0xffffffff, 0x000d000c,
820         0x9200, 0xffffffff, 0x00090008,
821         0x9204, 0xffffffff, 0x000b000a,
822         0x9208, 0xffffffff, 0x000c000f,
823         0x920c, 0xffffffff, 0x000e000d,
824         0x9210, 0xffffffff, 0x00110010,
825         0x9214, 0xffffffff, 0x000a0009,
826         0x9218, 0xffffffff, 0x000c000b,
827         0x921c, 0xffffffff, 0x0000000f,
828         0x9220, 0xffffffff, 0x000e000d,
829         0x9224, 0xffffffff, 0x00110010,
830         0x9228, 0xffffffff, 0x000b000a,
831         0x922c, 0xffffffff, 0x000d000c,
832         0x9230, 0xffffffff, 0x00000010,
833         0x9234, 0xffffffff, 0x000f000e,
834         0x9238, 0xffffffff, 0x00120011,
835         0x923c, 0xffffffff, 0x000c000b,
836         0x9240, 0xffffffff, 0x000e000d,
837         0x9244, 0xffffffff, 0x00000011,
838         0x9248, 0xffffffff, 0x0010000f,
839         0x924c, 0xffffffff, 0x00130012,
840         0x9250, 0xffffffff, 0x000d000c,
841         0x9254, 0xffffffff, 0x000f000e,
842         0x9258, 0xffffffff, 0x00100013,
843         0x925c, 0xffffffff, 0x00120011,
844         0x9260, 0xffffffff, 0x00150014,
845         0x9150, 0xffffffff, 0x96940200,
846         0x8708, 0xffffffff, 0x00900100,
847         0xc478, 0xffffffff, 0x00000080,
848         0xc404, 0xffffffff, 0x0020003f,
849         0x30, 0xffffffff, 0x0000001c,
850         0x34, 0x000f0000, 0x000f0000,
851         0x160c, 0xffffffff, 0x00000100,
852         0x1024, 0xffffffff, 0x00000100,
853         0x102c, 0x00000101, 0x00000000,
854         0x20a8, 0xffffffff, 0x00000104,
855         0x264c, 0x000c0000, 0x000c0000,
856         0x2648, 0x000c0000, 0x000c0000,
857         0x55e4, 0xff000fff, 0x00000100,
858         0x55e8, 0x00000001, 0x00000001,
859         0x2f50, 0x00000001, 0x00000001,
860         0x30cc, 0xc0000fff, 0x00000104,
861         0xc1e4, 0x00000001, 0x00000001,
862         0xd0c0, 0xfffffff0, 0x00000100,
863         0xd8c0, 0xfffffff0, 0x00000100
864 };
865
866 static const u32 oland_mgcg_cgcg_init[] =
867 {
868         0xc400, 0xffffffff, 0xfffffffc,
869         0x802c, 0xffffffff, 0xe0000000,
870         0x9a60, 0xffffffff, 0x00000100,
871         0x92a4, 0xffffffff, 0x00000100,
872         0xc164, 0xffffffff, 0x00000100,
873         0x9774, 0xffffffff, 0x00000100,
874         0x8984, 0xffffffff, 0x06000100,
875         0x8a18, 0xffffffff, 0x00000100,
876         0x92a0, 0xffffffff, 0x00000100,
877         0xc380, 0xffffffff, 0x00000100,
878         0x8b28, 0xffffffff, 0x00000100,
879         0x9144, 0xffffffff, 0x00000100,
880         0x8d88, 0xffffffff, 0x00000100,
881         0x8d8c, 0xffffffff, 0x00000100,
882         0x9030, 0xffffffff, 0x00000100,
883         0x9034, 0xffffffff, 0x00000100,
884         0x9038, 0xffffffff, 0x00000100,
885         0x903c, 0xffffffff, 0x00000100,
886         0xad80, 0xffffffff, 0x00000100,
887         0xac54, 0xffffffff, 0x00000100,
888         0x897c, 0xffffffff, 0x06000100,
889         0x9868, 0xffffffff, 0x00000100,
890         0x9510, 0xffffffff, 0x00000100,
891         0xaf04, 0xffffffff, 0x00000100,
892         0xae04, 0xffffffff, 0x00000100,
893         0x949c, 0xffffffff, 0x00000100,
894         0x802c, 0xffffffff, 0xe0000000,
895         0x9160, 0xffffffff, 0x00010000,
896         0x9164, 0xffffffff, 0x00030002,
897         0x9168, 0xffffffff, 0x00040007,
898         0x916c, 0xffffffff, 0x00060005,
899         0x9170, 0xffffffff, 0x00090008,
900         0x9174, 0xffffffff, 0x00020001,
901         0x9178, 0xffffffff, 0x00040003,
902         0x917c, 0xffffffff, 0x00000007,
903         0x9180, 0xffffffff, 0x00060005,
904         0x9184, 0xffffffff, 0x00090008,
905         0x9188, 0xffffffff, 0x00030002,
906         0x918c, 0xffffffff, 0x00050004,
907         0x9190, 0xffffffff, 0x00000008,
908         0x9194, 0xffffffff, 0x00070006,
909         0x9198, 0xffffffff, 0x000a0009,
910         0x919c, 0xffffffff, 0x00040003,
911         0x91a0, 0xffffffff, 0x00060005,
912         0x91a4, 0xffffffff, 0x00000009,
913         0x91a8, 0xffffffff, 0x00080007,
914         0x91ac, 0xffffffff, 0x000b000a,
915         0x91b0, 0xffffffff, 0x00050004,
916         0x91b4, 0xffffffff, 0x00070006,
917         0x91b8, 0xffffffff, 0x0008000b,
918         0x91bc, 0xffffffff, 0x000a0009,
919         0x91c0, 0xffffffff, 0x000d000c,
920         0x91c4, 0xffffffff, 0x00060005,
921         0x91c8, 0xffffffff, 0x00080007,
922         0x91cc, 0xffffffff, 0x0000000b,
923         0x91d0, 0xffffffff, 0x000a0009,
924         0x91d4, 0xffffffff, 0x000d000c,
925         0x9150, 0xffffffff, 0x96940200,
926         0x8708, 0xffffffff, 0x00900100,
927         0xc478, 0xffffffff, 0x00000080,
928         0xc404, 0xffffffff, 0x0020003f,
929         0x30, 0xffffffff, 0x0000001c,
930         0x34, 0x000f0000, 0x000f0000,
931         0x160c, 0xffffffff, 0x00000100,
932         0x1024, 0xffffffff, 0x00000100,
933         0x102c, 0x00000101, 0x00000000,
934         0x20a8, 0xffffffff, 0x00000104,
935         0x264c, 0x000c0000, 0x000c0000,
936         0x2648, 0x000c0000, 0x000c0000,
937         0x55e4, 0xff000fff, 0x00000100,
938         0x55e8, 0x00000001, 0x00000001,
939         0x2f50, 0x00000001, 0x00000001,
940         0x30cc, 0xc0000fff, 0x00000104,
941         0xc1e4, 0x00000001, 0x00000001,
942         0xd0c0, 0xfffffff0, 0x00000100,
943         0xd8c0, 0xfffffff0, 0x00000100
944 };
945
946 static const u32 hainan_mgcg_cgcg_init[] =
947 {
948         0xc400, 0xffffffff, 0xfffffffc,
949         0x802c, 0xffffffff, 0xe0000000,
950         0x9a60, 0xffffffff, 0x00000100,
951         0x92a4, 0xffffffff, 0x00000100,
952         0xc164, 0xffffffff, 0x00000100,
953         0x9774, 0xffffffff, 0x00000100,
954         0x8984, 0xffffffff, 0x06000100,
955         0x8a18, 0xffffffff, 0x00000100,
956         0x92a0, 0xffffffff, 0x00000100,
957         0xc380, 0xffffffff, 0x00000100,
958         0x8b28, 0xffffffff, 0x00000100,
959         0x9144, 0xffffffff, 0x00000100,
960         0x8d88, 0xffffffff, 0x00000100,
961         0x8d8c, 0xffffffff, 0x00000100,
962         0x9030, 0xffffffff, 0x00000100,
963         0x9034, 0xffffffff, 0x00000100,
964         0x9038, 0xffffffff, 0x00000100,
965         0x903c, 0xffffffff, 0x00000100,
966         0xad80, 0xffffffff, 0x00000100,
967         0xac54, 0xffffffff, 0x00000100,
968         0x897c, 0xffffffff, 0x06000100,
969         0x9868, 0xffffffff, 0x00000100,
970         0x9510, 0xffffffff, 0x00000100,
971         0xaf04, 0xffffffff, 0x00000100,
972         0xae04, 0xffffffff, 0x00000100,
973         0x949c, 0xffffffff, 0x00000100,
974         0x802c, 0xffffffff, 0xe0000000,
975         0x9160, 0xffffffff, 0x00010000,
976         0x9164, 0xffffffff, 0x00030002,
977         0x9168, 0xffffffff, 0x00040007,
978         0x916c, 0xffffffff, 0x00060005,
979         0x9170, 0xffffffff, 0x00090008,
980         0x9174, 0xffffffff, 0x00020001,
981         0x9178, 0xffffffff, 0x00040003,
982         0x917c, 0xffffffff, 0x00000007,
983         0x9180, 0xffffffff, 0x00060005,
984         0x9184, 0xffffffff, 0x00090008,
985         0x9188, 0xffffffff, 0x00030002,
986         0x918c, 0xffffffff, 0x00050004,
987         0x9190, 0xffffffff, 0x00000008,
988         0x9194, 0xffffffff, 0x00070006,
989         0x9198, 0xffffffff, 0x000a0009,
990         0x919c, 0xffffffff, 0x00040003,
991         0x91a0, 0xffffffff, 0x00060005,
992         0x91a4, 0xffffffff, 0x00000009,
993         0x91a8, 0xffffffff, 0x00080007,
994         0x91ac, 0xffffffff, 0x000b000a,
995         0x91b0, 0xffffffff, 0x00050004,
996         0x91b4, 0xffffffff, 0x00070006,
997         0x91b8, 0xffffffff, 0x0008000b,
998         0x91bc, 0xffffffff, 0x000a0009,
999         0x91c0, 0xffffffff, 0x000d000c,
1000         0x91c4, 0xffffffff, 0x00060005,
1001         0x91c8, 0xffffffff, 0x00080007,
1002         0x91cc, 0xffffffff, 0x0000000b,
1003         0x91d0, 0xffffffff, 0x000a0009,
1004         0x91d4, 0xffffffff, 0x000d000c,
1005         0x9150, 0xffffffff, 0x96940200,
1006         0x8708, 0xffffffff, 0x00900100,
1007         0xc478, 0xffffffff, 0x00000080,
1008         0xc404, 0xffffffff, 0x0020003f,
1009         0x30, 0xffffffff, 0x0000001c,
1010         0x34, 0x000f0000, 0x000f0000,
1011         0x160c, 0xffffffff, 0x00000100,
1012         0x1024, 0xffffffff, 0x00000100,
1013         0x20a8, 0xffffffff, 0x00000104,
1014         0x264c, 0x000c0000, 0x000c0000,
1015         0x2648, 0x000c0000, 0x000c0000,
1016         0x2f50, 0x00000001, 0x00000001,
1017         0x30cc, 0xc0000fff, 0x00000104,
1018         0xc1e4, 0x00000001, 0x00000001,
1019         0xd0c0, 0xfffffff0, 0x00000100,
1020         0xd8c0, 0xfffffff0, 0x00000100
1021 };
1022
1023 static u32 verde_pg_init[] =
1024 {
1025         0x353c, 0xffffffff, 0x40000,
1026         0x3538, 0xffffffff, 0x200010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x7007,
1033         0x3538, 0xffffffff, 0x300010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x400000,
1040         0x3538, 0xffffffff, 0x100010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x120200,
1047         0x3538, 0xffffffff, 0x500010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x1e1e16,
1054         0x3538, 0xffffffff, 0x600010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x171f1e,
1061         0x3538, 0xffffffff, 0x700010ff,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x0,
1066         0x353c, 0xffffffff, 0x0,
1067         0x353c, 0xffffffff, 0x0,
1068         0x3538, 0xffffffff, 0x9ff,
1069         0x3500, 0xffffffff, 0x0,
1070         0x3504, 0xffffffff, 0x10000800,
1071         0x3504, 0xffffffff, 0xf,
1072         0x3504, 0xffffffff, 0xf,
1073         0x3500, 0xffffffff, 0x4,
1074         0x3504, 0xffffffff, 0x1000051e,
1075         0x3504, 0xffffffff, 0xffff,
1076         0x3504, 0xffffffff, 0xffff,
1077         0x3500, 0xffffffff, 0x8,
1078         0x3504, 0xffffffff, 0x80500,
1079         0x3500, 0xffffffff, 0x12,
1080         0x3504, 0xffffffff, 0x9050c,
1081         0x3500, 0xffffffff, 0x1d,
1082         0x3504, 0xffffffff, 0xb052c,
1083         0x3500, 0xffffffff, 0x2a,
1084         0x3504, 0xffffffff, 0x1053e,
1085         0x3500, 0xffffffff, 0x2d,
1086         0x3504, 0xffffffff, 0x10546,
1087         0x3500, 0xffffffff, 0x30,
1088         0x3504, 0xffffffff, 0xa054e,
1089         0x3500, 0xffffffff, 0x3c,
1090         0x3504, 0xffffffff, 0x1055f,
1091         0x3500, 0xffffffff, 0x3f,
1092         0x3504, 0xffffffff, 0x10567,
1093         0x3500, 0xffffffff, 0x42,
1094         0x3504, 0xffffffff, 0x1056f,
1095         0x3500, 0xffffffff, 0x45,
1096         0x3504, 0xffffffff, 0x10572,
1097         0x3500, 0xffffffff, 0x48,
1098         0x3504, 0xffffffff, 0x20575,
1099         0x3500, 0xffffffff, 0x4c,
1100         0x3504, 0xffffffff, 0x190801,
1101         0x3500, 0xffffffff, 0x67,
1102         0x3504, 0xffffffff, 0x1082a,
1103         0x3500, 0xffffffff, 0x6a,
1104         0x3504, 0xffffffff, 0x1b082d,
1105         0x3500, 0xffffffff, 0x87,
1106         0x3504, 0xffffffff, 0x310851,
1107         0x3500, 0xffffffff, 0xba,
1108         0x3504, 0xffffffff, 0x891,
1109         0x3500, 0xffffffff, 0xbc,
1110         0x3504, 0xffffffff, 0x893,
1111         0x3500, 0xffffffff, 0xbe,
1112         0x3504, 0xffffffff, 0x20895,
1113         0x3500, 0xffffffff, 0xc2,
1114         0x3504, 0xffffffff, 0x20899,
1115         0x3500, 0xffffffff, 0xc6,
1116         0x3504, 0xffffffff, 0x2089d,
1117         0x3500, 0xffffffff, 0xca,
1118         0x3504, 0xffffffff, 0x8a1,
1119         0x3500, 0xffffffff, 0xcc,
1120         0x3504, 0xffffffff, 0x8a3,
1121         0x3500, 0xffffffff, 0xce,
1122         0x3504, 0xffffffff, 0x308a5,
1123         0x3500, 0xffffffff, 0xd3,
1124         0x3504, 0xffffffff, 0x6d08cd,
1125         0x3500, 0xffffffff, 0x142,
1126         0x3504, 0xffffffff, 0x2000095a,
1127         0x3504, 0xffffffff, 0x1,
1128         0x3500, 0xffffffff, 0x144,
1129         0x3504, 0xffffffff, 0x301f095b,
1130         0x3500, 0xffffffff, 0x165,
1131         0x3504, 0xffffffff, 0xc094d,
1132         0x3500, 0xffffffff, 0x173,
1133         0x3504, 0xffffffff, 0xf096d,
1134         0x3500, 0xffffffff, 0x184,
1135         0x3504, 0xffffffff, 0x15097f,
1136         0x3500, 0xffffffff, 0x19b,
1137         0x3504, 0xffffffff, 0xc0998,
1138         0x3500, 0xffffffff, 0x1a9,
1139         0x3504, 0xffffffff, 0x409a7,
1140         0x3500, 0xffffffff, 0x1af,
1141         0x3504, 0xffffffff, 0xcdc,
1142         0x3500, 0xffffffff, 0x1b1,
1143         0x3504, 0xffffffff, 0x800,
1144         0x3508, 0xffffffff, 0x6c9b2000,
1145         0x3510, 0xfc00, 0x2000,
1146         0x3544, 0xffffffff, 0xfc0,
1147         0x28d4, 0x00000100, 0x100
1148 };
1149
1150 static void si_init_golden_registers(struct radeon_device *rdev)
1151 {
1152         switch (rdev->family) {
1153         case CHIP_TAHITI:
1154                 radeon_program_register_sequence(rdev,
1155                                                  tahiti_golden_registers,
1156                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1157                 radeon_program_register_sequence(rdev,
1158                                                  tahiti_golden_rlc_registers,
1159                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1160                 radeon_program_register_sequence(rdev,
1161                                                  tahiti_mgcg_cgcg_init,
1162                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1163                 radeon_program_register_sequence(rdev,
1164                                                  tahiti_golden_registers2,
1165                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1166                 break;
1167         case CHIP_PITCAIRN:
1168                 radeon_program_register_sequence(rdev,
1169                                                  pitcairn_golden_registers,
1170                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1171                 radeon_program_register_sequence(rdev,
1172                                                  pitcairn_golden_rlc_registers,
1173                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1174                 radeon_program_register_sequence(rdev,
1175                                                  pitcairn_mgcg_cgcg_init,
1176                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1177                 break;
1178         case CHIP_VERDE:
1179                 radeon_program_register_sequence(rdev,
1180                                                  verde_golden_registers,
1181                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1182                 radeon_program_register_sequence(rdev,
1183                                                  verde_golden_rlc_registers,
1184                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1185                 radeon_program_register_sequence(rdev,
1186                                                  verde_mgcg_cgcg_init,
1187                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1188                 radeon_program_register_sequence(rdev,
1189                                                  verde_pg_init,
1190                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1191                 break;
1192         case CHIP_OLAND:
1193                 radeon_program_register_sequence(rdev,
1194                                                  oland_golden_registers,
1195                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1196                 radeon_program_register_sequence(rdev,
1197                                                  oland_golden_rlc_registers,
1198                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1199                 radeon_program_register_sequence(rdev,
1200                                                  oland_mgcg_cgcg_init,
1201                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1202                 break;
1203         case CHIP_HAINAN:
1204                 radeon_program_register_sequence(rdev,
1205                                                  hainan_golden_registers,
1206                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1207                 radeon_program_register_sequence(rdev,
1208                                                  hainan_golden_registers2,
1209                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1210                 radeon_program_register_sequence(rdev,
1211                                                  hainan_mgcg_cgcg_init,
1212                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1213                 break;
1214         default:
1215                 break;
1216         }
1217 }
1218
1219 #define PCIE_BUS_CLK                10000
1220 #define TCLK                        (PCIE_BUS_CLK / 10)
1221
1222 /**
1223  * si_get_xclk - get the xclk
1224  *
1225  * @rdev: radeon_device pointer
1226  *
1227  * Returns the reference clock used by the gfx engine
1228  * (SI).
1229  */
1230 u32 si_get_xclk(struct radeon_device *rdev)
1231 {
1232         u32 reference_clock = rdev->clock.spll.reference_freq;
1233         u32 tmp;
1234
1235         tmp = RREG32(CG_CLKPIN_CNTL_2);
1236         if (tmp & MUX_TCLK_TO_XCLK)
1237                 return TCLK;
1238
1239         tmp = RREG32(CG_CLKPIN_CNTL);
1240         if (tmp & XTALIN_DIVIDE)
1241                 return reference_clock / 4;
1242
1243         return reference_clock;
1244 }
1245
1246 /* get temperature in millidegrees */
1247 int si_get_temp(struct radeon_device *rdev)
1248 {
1249         u32 temp;
1250         int actual_temp = 0;
1251
1252         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1253                 CTF_TEMP_SHIFT;
1254
1255         if (temp & 0x200)
1256                 actual_temp = 255;
1257         else
1258                 actual_temp = temp & 0x1ff;
1259
1260         actual_temp = (actual_temp * 1000);
1261
1262         return actual_temp;
1263 }
1264
1265 #define TAHITI_IO_MC_REGS_SIZE 36
1266
1267 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1268         {0x0000006f, 0x03044000},
1269         {0x00000070, 0x0480c018},
1270         {0x00000071, 0x00000040},
1271         {0x00000072, 0x01000000},
1272         {0x00000074, 0x000000ff},
1273         {0x00000075, 0x00143400},
1274         {0x00000076, 0x08ec0800},
1275         {0x00000077, 0x040000cc},
1276         {0x00000079, 0x00000000},
1277         {0x0000007a, 0x21000409},
1278         {0x0000007c, 0x00000000},
1279         {0x0000007d, 0xe8000000},
1280         {0x0000007e, 0x044408a8},
1281         {0x0000007f, 0x00000003},
1282         {0x00000080, 0x00000000},
1283         {0x00000081, 0x01000000},
1284         {0x00000082, 0x02000000},
1285         {0x00000083, 0x00000000},
1286         {0x00000084, 0xe3f3e4f4},
1287         {0x00000085, 0x00052024},
1288         {0x00000087, 0x00000000},
1289         {0x00000088, 0x66036603},
1290         {0x00000089, 0x01000000},
1291         {0x0000008b, 0x1c0a0000},
1292         {0x0000008c, 0xff010000},
1293         {0x0000008e, 0xffffefff},
1294         {0x0000008f, 0xfff3efff},
1295         {0x00000090, 0xfff3efbf},
1296         {0x00000094, 0x00101101},
1297         {0x00000095, 0x00000fff},
1298         {0x00000096, 0x00116fff},
1299         {0x00000097, 0x60010000},
1300         {0x00000098, 0x10010000},
1301         {0x00000099, 0x00006000},
1302         {0x0000009a, 0x00001000},
1303         {0x0000009f, 0x00a77400}
1304 };
1305
1306 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1307         {0x0000006f, 0x03044000},
1308         {0x00000070, 0x0480c018},
1309         {0x00000071, 0x00000040},
1310         {0x00000072, 0x01000000},
1311         {0x00000074, 0x000000ff},
1312         {0x00000075, 0x00143400},
1313         {0x00000076, 0x08ec0800},
1314         {0x00000077, 0x040000cc},
1315         {0x00000079, 0x00000000},
1316         {0x0000007a, 0x21000409},
1317         {0x0000007c, 0x00000000},
1318         {0x0000007d, 0xe8000000},
1319         {0x0000007e, 0x044408a8},
1320         {0x0000007f, 0x00000003},
1321         {0x00000080, 0x00000000},
1322         {0x00000081, 0x01000000},
1323         {0x00000082, 0x02000000},
1324         {0x00000083, 0x00000000},
1325         {0x00000084, 0xe3f3e4f4},
1326         {0x00000085, 0x00052024},
1327         {0x00000087, 0x00000000},
1328         {0x00000088, 0x66036603},
1329         {0x00000089, 0x01000000},
1330         {0x0000008b, 0x1c0a0000},
1331         {0x0000008c, 0xff010000},
1332         {0x0000008e, 0xffffefff},
1333         {0x0000008f, 0xfff3efff},
1334         {0x00000090, 0xfff3efbf},
1335         {0x00000094, 0x00101101},
1336         {0x00000095, 0x00000fff},
1337         {0x00000096, 0x00116fff},
1338         {0x00000097, 0x60010000},
1339         {0x00000098, 0x10010000},
1340         {0x00000099, 0x00006000},
1341         {0x0000009a, 0x00001000},
1342         {0x0000009f, 0x00a47400}
1343 };
1344
1345 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1346         {0x0000006f, 0x03044000},
1347         {0x00000070, 0x0480c018},
1348         {0x00000071, 0x00000040},
1349         {0x00000072, 0x01000000},
1350         {0x00000074, 0x000000ff},
1351         {0x00000075, 0x00143400},
1352         {0x00000076, 0x08ec0800},
1353         {0x00000077, 0x040000cc},
1354         {0x00000079, 0x00000000},
1355         {0x0000007a, 0x21000409},
1356         {0x0000007c, 0x00000000},
1357         {0x0000007d, 0xe8000000},
1358         {0x0000007e, 0x044408a8},
1359         {0x0000007f, 0x00000003},
1360         {0x00000080, 0x00000000},
1361         {0x00000081, 0x01000000},
1362         {0x00000082, 0x02000000},
1363         {0x00000083, 0x00000000},
1364         {0x00000084, 0xe3f3e4f4},
1365         {0x00000085, 0x00052024},
1366         {0x00000087, 0x00000000},
1367         {0x00000088, 0x66036603},
1368         {0x00000089, 0x01000000},
1369         {0x0000008b, 0x1c0a0000},
1370         {0x0000008c, 0xff010000},
1371         {0x0000008e, 0xffffefff},
1372         {0x0000008f, 0xfff3efff},
1373         {0x00000090, 0xfff3efbf},
1374         {0x00000094, 0x00101101},
1375         {0x00000095, 0x00000fff},
1376         {0x00000096, 0x00116fff},
1377         {0x00000097, 0x60010000},
1378         {0x00000098, 0x10010000},
1379         {0x00000099, 0x00006000},
1380         {0x0000009a, 0x00001000},
1381         {0x0000009f, 0x00a37400}
1382 };
1383
1384 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1385         {0x0000006f, 0x03044000},
1386         {0x00000070, 0x0480c018},
1387         {0x00000071, 0x00000040},
1388         {0x00000072, 0x01000000},
1389         {0x00000074, 0x000000ff},
1390         {0x00000075, 0x00143400},
1391         {0x00000076, 0x08ec0800},
1392         {0x00000077, 0x040000cc},
1393         {0x00000079, 0x00000000},
1394         {0x0000007a, 0x21000409},
1395         {0x0000007c, 0x00000000},
1396         {0x0000007d, 0xe8000000},
1397         {0x0000007e, 0x044408a8},
1398         {0x0000007f, 0x00000003},
1399         {0x00000080, 0x00000000},
1400         {0x00000081, 0x01000000},
1401         {0x00000082, 0x02000000},
1402         {0x00000083, 0x00000000},
1403         {0x00000084, 0xe3f3e4f4},
1404         {0x00000085, 0x00052024},
1405         {0x00000087, 0x00000000},
1406         {0x00000088, 0x66036603},
1407         {0x00000089, 0x01000000},
1408         {0x0000008b, 0x1c0a0000},
1409         {0x0000008c, 0xff010000},
1410         {0x0000008e, 0xffffefff},
1411         {0x0000008f, 0xfff3efff},
1412         {0x00000090, 0xfff3efbf},
1413         {0x00000094, 0x00101101},
1414         {0x00000095, 0x00000fff},
1415         {0x00000096, 0x00116fff},
1416         {0x00000097, 0x60010000},
1417         {0x00000098, 0x10010000},
1418         {0x00000099, 0x00006000},
1419         {0x0000009a, 0x00001000},
1420         {0x0000009f, 0x00a17730}
1421 };
1422
1423 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1424         {0x0000006f, 0x03044000},
1425         {0x00000070, 0x0480c018},
1426         {0x00000071, 0x00000040},
1427         {0x00000072, 0x01000000},
1428         {0x00000074, 0x000000ff},
1429         {0x00000075, 0x00143400},
1430         {0x00000076, 0x08ec0800},
1431         {0x00000077, 0x040000cc},
1432         {0x00000079, 0x00000000},
1433         {0x0000007a, 0x21000409},
1434         {0x0000007c, 0x00000000},
1435         {0x0000007d, 0xe8000000},
1436         {0x0000007e, 0x044408a8},
1437         {0x0000007f, 0x00000003},
1438         {0x00000080, 0x00000000},
1439         {0x00000081, 0x01000000},
1440         {0x00000082, 0x02000000},
1441         {0x00000083, 0x00000000},
1442         {0x00000084, 0xe3f3e4f4},
1443         {0x00000085, 0x00052024},
1444         {0x00000087, 0x00000000},
1445         {0x00000088, 0x66036603},
1446         {0x00000089, 0x01000000},
1447         {0x0000008b, 0x1c0a0000},
1448         {0x0000008c, 0xff010000},
1449         {0x0000008e, 0xffffefff},
1450         {0x0000008f, 0xfff3efff},
1451         {0x00000090, 0xfff3efbf},
1452         {0x00000094, 0x00101101},
1453         {0x00000095, 0x00000fff},
1454         {0x00000096, 0x00116fff},
1455         {0x00000097, 0x60010000},
1456         {0x00000098, 0x10010000},
1457         {0x00000099, 0x00006000},
1458         {0x0000009a, 0x00001000},
1459         {0x0000009f, 0x00a07730}
1460 };
1461
1462 /* ucode loading */
1463 static int si_mc_load_microcode(struct radeon_device *rdev)
1464 {
1465         const __be32 *fw_data;
1466         u32 running, blackout = 0;
1467         u32 *io_mc_regs;
1468         int i, ucode_size, regs_size;
1469
1470         if (!rdev->mc_fw)
1471                 return -EINVAL;
1472
1473         switch (rdev->family) {
1474         case CHIP_TAHITI:
1475                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1476                 ucode_size = SI_MC_UCODE_SIZE;
1477                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1478                 break;
1479         case CHIP_PITCAIRN:
1480                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1481                 ucode_size = SI_MC_UCODE_SIZE;
1482                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1483                 break;
1484         case CHIP_VERDE:
1485         default:
1486                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1487                 ucode_size = SI_MC_UCODE_SIZE;
1488                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1489                 break;
1490         case CHIP_OLAND:
1491                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1492                 ucode_size = OLAND_MC_UCODE_SIZE;
1493                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1494                 break;
1495         case CHIP_HAINAN:
1496                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1497                 ucode_size = OLAND_MC_UCODE_SIZE;
1498                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1499                 break;
1500         }
1501
1502         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1503
1504         if (running == 0) {
1505                 if (running) {
1506                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1507                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1508                 }
1509
1510                 /* reset the engine and set to writable */
1511                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1512                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1513
1514                 /* load mc io regs */
1515                 for (i = 0; i < regs_size; i++) {
1516                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1517                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1518                 }
1519                 /* load the MC ucode */
1520                 fw_data = (const __be32 *)rdev->mc_fw->data;
1521                 for (i = 0; i < ucode_size; i++)
1522                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1523
1524                 /* put the engine back into the active state */
1525                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1526                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1527                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1528
1529                 /* wait for training to complete */
1530                 for (i = 0; i < rdev->usec_timeout; i++) {
1531                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1532                                 break;
1533                         udelay(1);
1534                 }
1535                 for (i = 0; i < rdev->usec_timeout; i++) {
1536                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537                                 break;
1538                         udelay(1);
1539                 }
1540
1541                 if (running)
1542                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1543         }
1544
1545         return 0;
1546 }
1547
1548 static int si_init_microcode(struct radeon_device *rdev)
1549 {
1550         const char *chip_name;
1551         const char *rlc_chip_name;
1552         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1553         size_t smc_req_size;
1554         char fw_name[30];
1555         int err;
1556
1557         DRM_DEBUG("\n");
1558
1559         switch (rdev->family) {
1560         case CHIP_TAHITI:
1561                 chip_name = "TAHITI";
1562                 rlc_chip_name = "TAHITI";
1563                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1564                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1565                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1566                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1567                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1568                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1569                 break;
1570         case CHIP_PITCAIRN:
1571                 chip_name = "PITCAIRN";
1572                 rlc_chip_name = "PITCAIRN";
1573                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1574                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1575                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1576                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1577                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1578                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1579                 break;
1580         case CHIP_VERDE:
1581                 chip_name = "VERDE";
1582                 rlc_chip_name = "VERDE";
1583                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1584                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1585                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1586                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1587                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1588                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1589                 break;
1590         case CHIP_OLAND:
1591                 chip_name = "OLAND";
1592                 rlc_chip_name = "OLAND";
1593                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1594                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1595                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1596                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1597                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1598                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1599                 break;
1600         case CHIP_HAINAN:
1601                 chip_name = "HAINAN";
1602                 rlc_chip_name = "HAINAN";
1603                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1604                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1605                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1606                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1607                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1608                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1609                 break;
1610         default: BUG();
1611         }
1612
1613         DRM_INFO("Loading %s Microcode\n", chip_name);
1614
1615         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1616         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1617         if (err)
1618                 goto out;
1619         if (rdev->pfp_fw->size != pfp_req_size) {
1620                 printk(KERN_ERR
1621                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1622                        rdev->pfp_fw->size, fw_name);
1623                 err = -EINVAL;
1624                 goto out;
1625         }
1626
1627         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1628         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1629         if (err)
1630                 goto out;
1631         if (rdev->me_fw->size != me_req_size) {
1632                 printk(KERN_ERR
1633                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1634                        rdev->me_fw->size, fw_name);
1635                 err = -EINVAL;
1636         }
1637
1638         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1639         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1640         if (err)
1641                 goto out;
1642         if (rdev->ce_fw->size != ce_req_size) {
1643                 printk(KERN_ERR
1644                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1645                        rdev->ce_fw->size, fw_name);
1646                 err = -EINVAL;
1647         }
1648
1649         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1650         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651         if (err)
1652                 goto out;
1653         if (rdev->rlc_fw->size != rlc_req_size) {
1654                 printk(KERN_ERR
1655                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1656                        rdev->rlc_fw->size, fw_name);
1657                 err = -EINVAL;
1658         }
1659
1660         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1661         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662         if (err)
1663                 goto out;
1664         if (rdev->mc_fw->size != mc_req_size) {
1665                 printk(KERN_ERR
1666                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1667                        rdev->mc_fw->size, fw_name);
1668                 err = -EINVAL;
1669         }
1670
1671         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673         if (err) {
1674                 printk(KERN_ERR
1675                        "smc: error loading firmware \"%s\"\n",
1676                        fw_name);
1677                 release_firmware(rdev->smc_fw);
1678                 rdev->smc_fw = NULL;
1679                 err = 0;
1680         } else if (rdev->smc_fw->size != smc_req_size) {
1681                 printk(KERN_ERR
1682                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1683                        rdev->smc_fw->size, fw_name);
1684                 err = -EINVAL;
1685         }
1686
1687 out:
1688         if (err) {
1689                 if (err != -EINVAL)
1690                         printk(KERN_ERR
1691                                "si_cp: Failed to load firmware \"%s\"\n",
1692                                fw_name);
1693                 release_firmware(rdev->pfp_fw);
1694                 rdev->pfp_fw = NULL;
1695                 release_firmware(rdev->me_fw);
1696                 rdev->me_fw = NULL;
1697                 release_firmware(rdev->ce_fw);
1698                 rdev->ce_fw = NULL;
1699                 release_firmware(rdev->rlc_fw);
1700                 rdev->rlc_fw = NULL;
1701                 release_firmware(rdev->mc_fw);
1702                 rdev->mc_fw = NULL;
1703                 release_firmware(rdev->smc_fw);
1704                 rdev->smc_fw = NULL;
1705         }
1706         return err;
1707 }
1708
1709 /* watermark setup */
1710 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1711                                    struct radeon_crtc *radeon_crtc,
1712                                    struct drm_display_mode *mode,
1713                                    struct drm_display_mode *other_mode)
1714 {
1715         u32 tmp, buffer_alloc, i;
1716         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1717         /*
1718          * Line Buffer Setup
1719          * There are 3 line buffers, each one shared by 2 display controllers.
1720          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1721          * the display controllers.  The paritioning is done via one of four
1722          * preset allocations specified in bits 21:20:
1723          *  0 - half lb
1724          *  2 - whole lb, other crtc must be disabled
1725          */
1726         /* this can get tricky if we have two large displays on a paired group
1727          * of crtcs.  Ideally for multiple large displays we'd assign them to
1728          * non-linked crtcs for maximum line buffer allocation.
1729          */
1730         if (radeon_crtc->base.enabled && mode) {
1731                 if (other_mode) {
1732                         tmp = 0; /* 1/2 */
1733                         buffer_alloc = 1;
1734                 } else {
1735                         tmp = 2; /* whole */
1736                         buffer_alloc = 2;
1737                 }
1738         } else {
1739                 tmp = 0;
1740                 buffer_alloc = 0;
1741         }
1742
1743         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1744                DC_LB_MEMORY_CONFIG(tmp));
1745
1746         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1747                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1748         for (i = 0; i < rdev->usec_timeout; i++) {
1749                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1750                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1751                         break;
1752                 udelay(1);
1753         }
1754
1755         if (radeon_crtc->base.enabled && mode) {
1756                 switch (tmp) {
1757                 case 0:
1758                 default:
1759                         return 4096 * 2;
1760                 case 2:
1761                         return 8192 * 2;
1762                 }
1763         }
1764
1765         /* controller not enabled, so no lb used */
1766         return 0;
1767 }
1768
1769 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1770 {
1771         u32 tmp = RREG32(MC_SHARED_CHMAP);
1772
1773         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1774         case 0:
1775         default:
1776                 return 1;
1777         case 1:
1778                 return 2;
1779         case 2:
1780                 return 4;
1781         case 3:
1782                 return 8;
1783         case 4:
1784                 return 3;
1785         case 5:
1786                 return 6;
1787         case 6:
1788                 return 10;
1789         case 7:
1790                 return 12;
1791         case 8:
1792                 return 16;
1793         }
1794 }
1795
1796 struct dce6_wm_params {
1797         u32 dram_channels; /* number of dram channels */
1798         u32 yclk;          /* bandwidth per dram data pin in kHz */
1799         u32 sclk;          /* engine clock in kHz */
1800         u32 disp_clk;      /* display clock in kHz */
1801         u32 src_width;     /* viewport width */
1802         u32 active_time;   /* active display time in ns */
1803         u32 blank_time;    /* blank time in ns */
1804         bool interlaced;    /* mode is interlaced */
1805         fixed20_12 vsc;    /* vertical scale ratio */
1806         u32 num_heads;     /* number of active crtcs */
1807         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1808         u32 lb_size;       /* line buffer allocated to pipe */
1809         u32 vtaps;         /* vertical scaler taps */
1810 };
1811
1812 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1813 {
1814         /* Calculate raw DRAM Bandwidth */
1815         fixed20_12 dram_efficiency; /* 0.7 */
1816         fixed20_12 yclk, dram_channels, bandwidth;
1817         fixed20_12 a;
1818
1819         a.full = dfixed_const(1000);
1820         yclk.full = dfixed_const(wm->yclk);
1821         yclk.full = dfixed_div(yclk, a);
1822         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1823         a.full = dfixed_const(10);
1824         dram_efficiency.full = dfixed_const(7);
1825         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1826         bandwidth.full = dfixed_mul(dram_channels, yclk);
1827         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1828
1829         return dfixed_trunc(bandwidth);
1830 }
1831
1832 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1833 {
1834         /* Calculate DRAM Bandwidth and the part allocated to display. */
1835         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1836         fixed20_12 yclk, dram_channels, bandwidth;
1837         fixed20_12 a;
1838
1839         a.full = dfixed_const(1000);
1840         yclk.full = dfixed_const(wm->yclk);
1841         yclk.full = dfixed_div(yclk, a);
1842         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1843         a.full = dfixed_const(10);
1844         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1845         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1846         bandwidth.full = dfixed_mul(dram_channels, yclk);
1847         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1848
1849         return dfixed_trunc(bandwidth);
1850 }
1851
1852 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1853 {
1854         /* Calculate the display Data return Bandwidth */
1855         fixed20_12 return_efficiency; /* 0.8 */
1856         fixed20_12 sclk, bandwidth;
1857         fixed20_12 a;
1858
1859         a.full = dfixed_const(1000);
1860         sclk.full = dfixed_const(wm->sclk);
1861         sclk.full = dfixed_div(sclk, a);
1862         a.full = dfixed_const(10);
1863         return_efficiency.full = dfixed_const(8);
1864         return_efficiency.full = dfixed_div(return_efficiency, a);
1865         a.full = dfixed_const(32);
1866         bandwidth.full = dfixed_mul(a, sclk);
1867         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1868
1869         return dfixed_trunc(bandwidth);
1870 }
1871
1872 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1873 {
1874         return 32;
1875 }
1876
1877 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1878 {
1879         /* Calculate the DMIF Request Bandwidth */
1880         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1881         fixed20_12 disp_clk, sclk, bandwidth;
1882         fixed20_12 a, b1, b2;
1883         u32 min_bandwidth;
1884
1885         a.full = dfixed_const(1000);
1886         disp_clk.full = dfixed_const(wm->disp_clk);
1887         disp_clk.full = dfixed_div(disp_clk, a);
1888         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1889         b1.full = dfixed_mul(a, disp_clk);
1890
1891         a.full = dfixed_const(1000);
1892         sclk.full = dfixed_const(wm->sclk);
1893         sclk.full = dfixed_div(sclk, a);
1894         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1895         b2.full = dfixed_mul(a, sclk);
1896
1897         a.full = dfixed_const(10);
1898         disp_clk_request_efficiency.full = dfixed_const(8);
1899         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1900
1901         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1902
1903         a.full = dfixed_const(min_bandwidth);
1904         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1905
1906         return dfixed_trunc(bandwidth);
1907 }
1908
1909 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1910 {
1911         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1912         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1913         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1914         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1915
1916         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1917 }
1918
1919 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1920 {
1921         /* Calculate the display mode Average Bandwidth
1922          * DisplayMode should contain the source and destination dimensions,
1923          * timing, etc.
1924          */
1925         fixed20_12 bpp;
1926         fixed20_12 line_time;
1927         fixed20_12 src_width;
1928         fixed20_12 bandwidth;
1929         fixed20_12 a;
1930
1931         a.full = dfixed_const(1000);
1932         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1933         line_time.full = dfixed_div(line_time, a);
1934         bpp.full = dfixed_const(wm->bytes_per_pixel);
1935         src_width.full = dfixed_const(wm->src_width);
1936         bandwidth.full = dfixed_mul(src_width, bpp);
1937         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1938         bandwidth.full = dfixed_div(bandwidth, line_time);
1939
1940         return dfixed_trunc(bandwidth);
1941 }
1942
1943 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1944 {
1945         /* First calcualte the latency in ns */
1946         u32 mc_latency = 2000; /* 2000 ns. */
1947         u32 available_bandwidth = dce6_available_bandwidth(wm);
1948         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1949         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1950         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1951         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1952                 (wm->num_heads * cursor_line_pair_return_time);
1953         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1954         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1955         u32 tmp, dmif_size = 12288;
1956         fixed20_12 a, b, c;
1957
1958         if (wm->num_heads == 0)
1959                 return 0;
1960
1961         a.full = dfixed_const(2);
1962         b.full = dfixed_const(1);
1963         if ((wm->vsc.full > a.full) ||
1964             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1965             (wm->vtaps >= 5) ||
1966             ((wm->vsc.full >= a.full) && wm->interlaced))
1967                 max_src_lines_per_dst_line = 4;
1968         else
1969                 max_src_lines_per_dst_line = 2;
1970
1971         a.full = dfixed_const(available_bandwidth);
1972         b.full = dfixed_const(wm->num_heads);
1973         a.full = dfixed_div(a, b);
1974
1975         b.full = dfixed_const(mc_latency + 512);
1976         c.full = dfixed_const(wm->disp_clk);
1977         b.full = dfixed_div(b, c);
1978
1979         c.full = dfixed_const(dmif_size);
1980         b.full = dfixed_div(c, b);
1981
1982         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1983
1984         b.full = dfixed_const(1000);
1985         c.full = dfixed_const(wm->disp_clk);
1986         b.full = dfixed_div(c, b);
1987         c.full = dfixed_const(wm->bytes_per_pixel);
1988         b.full = dfixed_mul(b, c);
1989
1990         lb_fill_bw = min(tmp, dfixed_trunc(b));
1991
1992         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1993         b.full = dfixed_const(1000);
1994         c.full = dfixed_const(lb_fill_bw);
1995         b.full = dfixed_div(c, b);
1996         a.full = dfixed_div(a, b);
1997         line_fill_time = dfixed_trunc(a);
1998
1999         if (line_fill_time < wm->active_time)
2000                 return latency;
2001         else
2002                 return latency + (line_fill_time - wm->active_time);
2003
2004 }
2005
2006 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2007 {
2008         if (dce6_average_bandwidth(wm) <=
2009             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2010                 return true;
2011         else
2012                 return false;
2013 };
2014
2015 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2016 {
2017         if (dce6_average_bandwidth(wm) <=
2018             (dce6_available_bandwidth(wm) / wm->num_heads))
2019                 return true;
2020         else
2021                 return false;
2022 };
2023
2024 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2025 {
2026         u32 lb_partitions = wm->lb_size / wm->src_width;
2027         u32 line_time = wm->active_time + wm->blank_time;
2028         u32 latency_tolerant_lines;
2029         u32 latency_hiding;
2030         fixed20_12 a;
2031
2032         a.full = dfixed_const(1);
2033         if (wm->vsc.full > a.full)
2034                 latency_tolerant_lines = 1;
2035         else {
2036                 if (lb_partitions <= (wm->vtaps + 1))
2037                         latency_tolerant_lines = 1;
2038                 else
2039                         latency_tolerant_lines = 2;
2040         }
2041
2042         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2043
2044         if (dce6_latency_watermark(wm) <= latency_hiding)
2045                 return true;
2046         else
2047                 return false;
2048 }
2049
2050 static void dce6_program_watermarks(struct radeon_device *rdev,
2051                                          struct radeon_crtc *radeon_crtc,
2052                                          u32 lb_size, u32 num_heads)
2053 {
2054         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2055         struct dce6_wm_params wm_low, wm_high;
2056         u32 dram_channels;
2057         u32 pixel_period;
2058         u32 line_time = 0;
2059         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2060         u32 priority_a_mark = 0, priority_b_mark = 0;
2061         u32 priority_a_cnt = PRIORITY_OFF;
2062         u32 priority_b_cnt = PRIORITY_OFF;
2063         u32 tmp, arb_control3;
2064         fixed20_12 a, b, c;
2065
2066         if (radeon_crtc->base.enabled && num_heads && mode) {
2067                 pixel_period = 1000000 / (u32)mode->clock;
2068                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2069                 priority_a_cnt = 0;
2070                 priority_b_cnt = 0;
2071
2072                 if (rdev->family == CHIP_ARUBA)
2073                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2074                 else
2075                         dram_channels = si_get_number_of_dram_channels(rdev);
2076
2077                 /* watermark for high clocks */
2078                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079                         wm_high.yclk =
2080                                 radeon_dpm_get_mclk(rdev, false) * 10;
2081                         wm_high.sclk =
2082                                 radeon_dpm_get_sclk(rdev, false) * 10;
2083                 } else {
2084                         wm_high.yclk = rdev->pm.current_mclk * 10;
2085                         wm_high.sclk = rdev->pm.current_sclk * 10;
2086                 }
2087
2088                 wm_high.disp_clk = mode->clock;
2089                 wm_high.src_width = mode->crtc_hdisplay;
2090                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2091                 wm_high.blank_time = line_time - wm_high.active_time;
2092                 wm_high.interlaced = false;
2093                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094                         wm_high.interlaced = true;
2095                 wm_high.vsc = radeon_crtc->vsc;
2096                 wm_high.vtaps = 1;
2097                 if (radeon_crtc->rmx_type != RMX_OFF)
2098                         wm_high.vtaps = 2;
2099                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100                 wm_high.lb_size = lb_size;
2101                 wm_high.dram_channels = dram_channels;
2102                 wm_high.num_heads = num_heads;
2103
2104                 /* watermark for low clocks */
2105                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2106                         wm_low.yclk =
2107                                 radeon_dpm_get_mclk(rdev, true) * 10;
2108                         wm_low.sclk =
2109                                 radeon_dpm_get_sclk(rdev, true) * 10;
2110                 } else {
2111                         wm_low.yclk = rdev->pm.current_mclk * 10;
2112                         wm_low.sclk = rdev->pm.current_sclk * 10;
2113                 }
2114
2115                 wm_low.disp_clk = mode->clock;
2116                 wm_low.src_width = mode->crtc_hdisplay;
2117                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2118                 wm_low.blank_time = line_time - wm_low.active_time;
2119                 wm_low.interlaced = false;
2120                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2121                         wm_low.interlaced = true;
2122                 wm_low.vsc = radeon_crtc->vsc;
2123                 wm_low.vtaps = 1;
2124                 if (radeon_crtc->rmx_type != RMX_OFF)
2125                         wm_low.vtaps = 2;
2126                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2127                 wm_low.lb_size = lb_size;
2128                 wm_low.dram_channels = dram_channels;
2129                 wm_low.num_heads = num_heads;
2130
2131                 /* set for high clocks */
2132                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2133                 /* set for low clocks */
2134                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2135
2136                 /* possibly force display priority to high */
2137                 /* should really do this at mode validation time... */
2138                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2139                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2140                     !dce6_check_latency_hiding(&wm_high) ||
2141                     (rdev->disp_priority == 2)) {
2142                         DRM_DEBUG_KMS("force priority to high\n");
2143                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2144                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2145                 }
2146                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2147                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2148                     !dce6_check_latency_hiding(&wm_low) ||
2149                     (rdev->disp_priority == 2)) {
2150                         DRM_DEBUG_KMS("force priority to high\n");
2151                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2152                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2153                 }
2154
2155                 a.full = dfixed_const(1000);
2156                 b.full = dfixed_const(mode->clock);
2157                 b.full = dfixed_div(b, a);
2158                 c.full = dfixed_const(latency_watermark_a);
2159                 c.full = dfixed_mul(c, b);
2160                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2161                 c.full = dfixed_div(c, a);
2162                 a.full = dfixed_const(16);
2163                 c.full = dfixed_div(c, a);
2164                 priority_a_mark = dfixed_trunc(c);
2165                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2166
2167                 a.full = dfixed_const(1000);
2168                 b.full = dfixed_const(mode->clock);
2169                 b.full = dfixed_div(b, a);
2170                 c.full = dfixed_const(latency_watermark_b);
2171                 c.full = dfixed_mul(c, b);
2172                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2173                 c.full = dfixed_div(c, a);
2174                 a.full = dfixed_const(16);
2175                 c.full = dfixed_div(c, a);
2176                 priority_b_mark = dfixed_trunc(c);
2177                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2178         }
2179
2180         /* select wm A */
2181         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2182         tmp = arb_control3;
2183         tmp &= ~LATENCY_WATERMARK_MASK(3);
2184         tmp |= LATENCY_WATERMARK_MASK(1);
2185         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2186         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2187                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2188                 LATENCY_HIGH_WATERMARK(line_time)));
2189         /* select wm B */
2190         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2191         tmp &= ~LATENCY_WATERMARK_MASK(3);
2192         tmp |= LATENCY_WATERMARK_MASK(2);
2193         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2194         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2195                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2196                 LATENCY_HIGH_WATERMARK(line_time)));
2197         /* restore original selection */
2198         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2199
2200         /* write the priority marks */
2201         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2202         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2203
2204         /* save values for DPM */
2205         radeon_crtc->line_time = line_time;
2206         radeon_crtc->wm_high = latency_watermark_a;
2207         radeon_crtc->wm_low = latency_watermark_b;
2208 }
2209
2210 void dce6_bandwidth_update(struct radeon_device *rdev)
2211 {
2212         struct drm_display_mode *mode0 = NULL;
2213         struct drm_display_mode *mode1 = NULL;
2214         u32 num_heads = 0, lb_size;
2215         int i;
2216
2217         radeon_update_display_priority(rdev);
2218
2219         for (i = 0; i < rdev->num_crtc; i++) {
2220                 if (rdev->mode_info.crtcs[i]->base.enabled)
2221                         num_heads++;
2222         }
2223         for (i = 0; i < rdev->num_crtc; i += 2) {
2224                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2225                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2226                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2227                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2228                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2229                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2230         }
2231 }
2232
2233 /*
2234  * Core functions
2235  */
2236 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2237 {
2238         const u32 num_tile_mode_states = 32;
2239         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2240
2241         switch (rdev->config.si.mem_row_size_in_kb) {
2242         case 1:
2243                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2244                 break;
2245         case 2:
2246         default:
2247                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2248                 break;
2249         case 4:
2250                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2251                 break;
2252         }
2253
2254         if ((rdev->family == CHIP_TAHITI) ||
2255             (rdev->family == CHIP_PITCAIRN)) {
2256                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2257                         switch (reg_offset) {
2258                         case 0:  /* non-AA compressed depth or any compressed stencil */
2259                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2261                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2262                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2263                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2264                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2267                                 break;
2268                         case 1:  /* 2xAA/4xAA compressed depth only */
2269                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2271                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2272                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2273                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2274                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2276                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2277                                 break;
2278                         case 2:  /* 8xAA compressed depth only */
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2283                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2284                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287                                 break;
2288                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2293                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2294                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297                                 break;
2298                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2299                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2300                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2303                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2304                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307                                 break;
2308                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2309                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312                                                  TILE_SPLIT(split_equal_to_row_size) |
2313                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2314                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317                                 break;
2318                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2319                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2321                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322                                                  TILE_SPLIT(split_equal_to_row_size) |
2323                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2324                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2326                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2327                                 break;
2328                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2329                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332                                                  TILE_SPLIT(split_equal_to_row_size) |
2333                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2334                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337                                 break;
2338                         case 8:  /* 1D and 1D Array Surfaces */
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2343                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2344                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2346                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2347                                 break;
2348                         case 9:  /* Displayable maps. */
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2353                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2354                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357                                 break;
2358                         case 10:  /* Display 8bpp. */
2359                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2363                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2364                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367                                 break;
2368                         case 11:  /* Display 16bpp. */
2369                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2373                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2374                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377                                 break;
2378                         case 12:  /* Display 32bpp. */
2379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2383                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2384                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2387                                 break;
2388                         case 13:  /* Thin. */
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2393                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2394                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397                                 break;
2398                         case 14:  /* Thin 8 bpp. */
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2403                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2404                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2407                                 break;
2408                         case 15:  /* Thin 16 bpp. */
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2413                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2414                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2417                                 break;
2418                         case 16:  /* Thin 32 bpp. */
2419                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2423                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2424                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2427                                 break;
2428                         case 17:  /* Thin 64 bpp. */
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432                                                  TILE_SPLIT(split_equal_to_row_size) |
2433                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2434                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2437                                 break;
2438                         case 21:  /* 8 bpp PRT. */
2439                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2443                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2444                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2447                                 break;
2448                         case 22:  /* 16 bpp PRT */
2449                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2454                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2457                                 break;
2458                         case 23:  /* 32 bpp PRT */
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2463                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2464                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2467                                 break;
2468                         case 24:  /* 64 bpp PRT */
2469                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2471                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2473                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2474                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2477                                 break;
2478                         case 25:  /* 128 bpp PRT */
2479                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2481                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2483                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2484                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2487                                 break;
2488                         default:
2489                                 gb_tile_moden = 0;
2490                                 break;
2491                         }
2492                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2493                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2494                 }
2495         } else if ((rdev->family == CHIP_VERDE) ||
2496                    (rdev->family == CHIP_OLAND) ||
2497                    (rdev->family == CHIP_HAINAN)) {
2498                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2499                         switch (reg_offset) {
2500                         case 0:  /* non-AA compressed depth or any compressed stencil */
2501                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2503                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2504                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2505                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2506                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2508                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2509                                 break;
2510                         case 1:  /* 2xAA/4xAA compressed depth only */
2511                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2513                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2514                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2516                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2519                                 break;
2520                         case 2:  /* 8xAA compressed depth only */
2521                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2525                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2526                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2529                                 break;
2530                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2531                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2535                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2536                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539                                 break;
2540                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2541                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2546                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549                                 break;
2550                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2551                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554                                                  TILE_SPLIT(split_equal_to_row_size) |
2555                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2556                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559                                 break;
2560                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2561                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564                                                  TILE_SPLIT(split_equal_to_row_size) |
2565                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2566                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569                                 break;
2570                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2571                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2573                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574                                                  TILE_SPLIT(split_equal_to_row_size) |
2575                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2576                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2579                                 break;
2580                         case 8:  /* 1D and 1D Array Surfaces */
2581                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2582                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2585                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2586                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2588                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589                                 break;
2590                         case 9:  /* Displayable maps. */
2591                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2595                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2596                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2599                                 break;
2600                         case 10:  /* Display 8bpp. */
2601                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2605                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2606                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2609                                 break;
2610                         case 11:  /* Display 16bpp. */
2611                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2615                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2616                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619                                 break;
2620                         case 12:  /* Display 32bpp. */
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2625                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2626                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2629                                 break;
2630                         case 13:  /* Thin. */
2631                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2632                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2636                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2638                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2639                                 break;
2640                         case 14:  /* Thin 8 bpp. */
2641                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2646                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649                                 break;
2650                         case 15:  /* Thin 16 bpp. */
2651                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2656                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659                                 break;
2660                         case 16:  /* Thin 32 bpp. */
2661                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2665                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2666                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669                                 break;
2670                         case 17:  /* Thin 64 bpp. */
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674                                                  TILE_SPLIT(split_equal_to_row_size) |
2675                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2676                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679                                 break;
2680                         case 21:  /* 8 bpp PRT. */
2681                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2684                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2685                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2686                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2687                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689                                 break;
2690                         case 22:  /* 16 bpp PRT */
2691                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2696                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699                                 break;
2700                         case 23:  /* 32 bpp PRT */
2701                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2704                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2706                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2709                                 break;
2710                         case 24:  /* 64 bpp PRT */
2711                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2715                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2716                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2718                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2719                                 break;
2720                         case 25:  /* 128 bpp PRT */
2721                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2723                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2724                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2725                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2726                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2729                                 break;
2730                         default:
2731                                 gb_tile_moden = 0;
2732                                 break;
2733                         }
2734                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2735                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2736                 }
2737         } else
2738                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2739 }
2740
2741 static void si_select_se_sh(struct radeon_device *rdev,
2742                             u32 se_num, u32 sh_num)
2743 {
2744         u32 data = INSTANCE_BROADCAST_WRITES;
2745
2746         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2747                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2748         else if (se_num == 0xffffffff)
2749                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2750         else if (sh_num == 0xffffffff)
2751                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2752         else
2753                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2754         WREG32(GRBM_GFX_INDEX, data);
2755 }
2756
2757 static u32 si_create_bitmask(u32 bit_width)
2758 {
2759         u32 i, mask = 0;
2760
2761         for (i = 0; i < bit_width; i++) {
2762                 mask <<= 1;
2763                 mask |= 1;
2764         }
2765         return mask;
2766 }
2767
2768 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2769 {
2770         u32 data, mask;
2771
2772         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2773         if (data & 1)
2774                 data &= INACTIVE_CUS_MASK;
2775         else
2776                 data = 0;
2777         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2778
2779         data >>= INACTIVE_CUS_SHIFT;
2780
2781         mask = si_create_bitmask(cu_per_sh);
2782
2783         return ~data & mask;
2784 }
2785
2786 static void si_setup_spi(struct radeon_device *rdev,
2787                          u32 se_num, u32 sh_per_se,
2788                          u32 cu_per_sh)
2789 {
2790         int i, j, k;
2791         u32 data, mask, active_cu;
2792
2793         for (i = 0; i < se_num; i++) {
2794                 for (j = 0; j < sh_per_se; j++) {
2795                         si_select_se_sh(rdev, i, j);
2796                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2797                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2798
2799                         mask = 1;
2800                         for (k = 0; k < 16; k++) {
2801                                 mask <<= k;
2802                                 if (active_cu & mask) {
2803                                         data &= ~mask;
2804                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2805                                         break;
2806                                 }
2807                         }
2808                 }
2809         }
2810         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2811 }
2812
2813 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2814                               u32 max_rb_num_per_se,
2815                               u32 sh_per_se)
2816 {
2817         u32 data, mask;
2818
2819         data = RREG32(CC_RB_BACKEND_DISABLE);
2820         if (data & 1)
2821                 data &= BACKEND_DISABLE_MASK;
2822         else
2823                 data = 0;
2824         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2825
2826         data >>= BACKEND_DISABLE_SHIFT;
2827
2828         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2829
2830         return data & mask;
2831 }
2832
2833 static void si_setup_rb(struct radeon_device *rdev,
2834                         u32 se_num, u32 sh_per_se,
2835                         u32 max_rb_num_per_se)
2836 {
2837         int i, j;
2838         u32 data, mask;
2839         u32 disabled_rbs = 0;
2840         u32 enabled_rbs = 0;
2841
2842         for (i = 0; i < se_num; i++) {
2843                 for (j = 0; j < sh_per_se; j++) {
2844                         si_select_se_sh(rdev, i, j);
2845                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2846                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2847                 }
2848         }
2849         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2850
2851         mask = 1;
2852         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2853                 if (!(disabled_rbs & mask))
2854                         enabled_rbs |= mask;
2855                 mask <<= 1;
2856         }
2857
2858         rdev->config.si.backend_enable_mask = enabled_rbs;
2859
2860         for (i = 0; i < se_num; i++) {
2861                 si_select_se_sh(rdev, i, 0xffffffff);
2862                 data = 0;
2863                 for (j = 0; j < sh_per_se; j++) {
2864                         switch (enabled_rbs & 3) {
2865                         case 1:
2866                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2867                                 break;
2868                         case 2:
2869                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2870                                 break;
2871                         case 3:
2872                         default:
2873                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2874                                 break;
2875                         }
2876                         enabled_rbs >>= 2;
2877                 }
2878                 WREG32(PA_SC_RASTER_CONFIG, data);
2879         }
2880         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2881 }
2882
2883 static void si_gpu_init(struct radeon_device *rdev)
2884 {
2885         u32 gb_addr_config = 0;
2886         u32 mc_shared_chmap, mc_arb_ramcfg;
2887         u32 sx_debug_1;
2888         u32 hdp_host_path_cntl;
2889         u32 tmp;
2890         int i, j;
2891
2892         switch (rdev->family) {
2893         case CHIP_TAHITI:
2894                 rdev->config.si.max_shader_engines = 2;
2895                 rdev->config.si.max_tile_pipes = 12;
2896                 rdev->config.si.max_cu_per_sh = 8;
2897                 rdev->config.si.max_sh_per_se = 2;
2898                 rdev->config.si.max_backends_per_se = 4;
2899                 rdev->config.si.max_texture_channel_caches = 12;
2900                 rdev->config.si.max_gprs = 256;
2901                 rdev->config.si.max_gs_threads = 32;
2902                 rdev->config.si.max_hw_contexts = 8;
2903
2904                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2905                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2906                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2907                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2908                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2909                 break;
2910         case CHIP_PITCAIRN:
2911                 rdev->config.si.max_shader_engines = 2;
2912                 rdev->config.si.max_tile_pipes = 8;
2913                 rdev->config.si.max_cu_per_sh = 5;
2914                 rdev->config.si.max_sh_per_se = 2;
2915                 rdev->config.si.max_backends_per_se = 4;
2916                 rdev->config.si.max_texture_channel_caches = 8;
2917                 rdev->config.si.max_gprs = 256;
2918                 rdev->config.si.max_gs_threads = 32;
2919                 rdev->config.si.max_hw_contexts = 8;
2920
2921                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2922                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2923                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2924                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2925                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2926                 break;
2927         case CHIP_VERDE:
2928         default:
2929                 rdev->config.si.max_shader_engines = 1;
2930                 rdev->config.si.max_tile_pipes = 4;
2931                 rdev->config.si.max_cu_per_sh = 5;
2932                 rdev->config.si.max_sh_per_se = 2;
2933                 rdev->config.si.max_backends_per_se = 4;
2934                 rdev->config.si.max_texture_channel_caches = 4;
2935                 rdev->config.si.max_gprs = 256;
2936                 rdev->config.si.max_gs_threads = 32;
2937                 rdev->config.si.max_hw_contexts = 8;
2938
2939                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2940                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2941                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2942                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2943                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2944                 break;
2945         case CHIP_OLAND:
2946                 rdev->config.si.max_shader_engines = 1;
2947                 rdev->config.si.max_tile_pipes = 4;
2948                 rdev->config.si.max_cu_per_sh = 6;
2949                 rdev->config.si.max_sh_per_se = 1;
2950                 rdev->config.si.max_backends_per_se = 2;
2951                 rdev->config.si.max_texture_channel_caches = 4;
2952                 rdev->config.si.max_gprs = 256;
2953                 rdev->config.si.max_gs_threads = 16;
2954                 rdev->config.si.max_hw_contexts = 8;
2955
2956                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2957                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2958                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2959                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2960                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2961                 break;
2962         case CHIP_HAINAN:
2963                 rdev->config.si.max_shader_engines = 1;
2964                 rdev->config.si.max_tile_pipes = 4;
2965                 rdev->config.si.max_cu_per_sh = 5;
2966                 rdev->config.si.max_sh_per_se = 1;
2967                 rdev->config.si.max_backends_per_se = 1;
2968                 rdev->config.si.max_texture_channel_caches = 2;
2969                 rdev->config.si.max_gprs = 256;
2970                 rdev->config.si.max_gs_threads = 16;
2971                 rdev->config.si.max_hw_contexts = 8;
2972
2973                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2974                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2975                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2976                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2977                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2978                 break;
2979         }
2980
2981         /* Initialize HDP */
2982         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2983                 WREG32((0x2c14 + j), 0x00000000);
2984                 WREG32((0x2c18 + j), 0x00000000);
2985                 WREG32((0x2c1c + j), 0x00000000);
2986                 WREG32((0x2c20 + j), 0x00000000);
2987                 WREG32((0x2c24 + j), 0x00000000);
2988         }
2989
2990         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2991
2992         evergreen_fix_pci_max_read_req_size(rdev);
2993
2994         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2995
2996         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2997         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2998
2999         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3000         rdev->config.si.mem_max_burst_length_bytes = 256;
3001         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3002         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3003         if (rdev->config.si.mem_row_size_in_kb > 4)
3004                 rdev->config.si.mem_row_size_in_kb = 4;
3005         /* XXX use MC settings? */
3006         rdev->config.si.shader_engine_tile_size = 32;
3007         rdev->config.si.num_gpus = 1;
3008         rdev->config.si.multi_gpu_tile_size = 64;
3009
3010         /* fix up row size */
3011         gb_addr_config &= ~ROW_SIZE_MASK;
3012         switch (rdev->config.si.mem_row_size_in_kb) {
3013         case 1:
3014         default:
3015                 gb_addr_config |= ROW_SIZE(0);
3016                 break;
3017         case 2:
3018                 gb_addr_config |= ROW_SIZE(1);
3019                 break;
3020         case 4:
3021                 gb_addr_config |= ROW_SIZE(2);
3022                 break;
3023         }
3024
3025         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3026          * not have bank info, so create a custom tiling dword.
3027          * bits 3:0   num_pipes
3028          * bits 7:4   num_banks
3029          * bits 11:8  group_size
3030          * bits 15:12 row_size
3031          */
3032         rdev->config.si.tile_config = 0;
3033         switch (rdev->config.si.num_tile_pipes) {
3034         case 1:
3035                 rdev->config.si.tile_config |= (0 << 0);
3036                 break;
3037         case 2:
3038                 rdev->config.si.tile_config |= (1 << 0);
3039                 break;
3040         case 4:
3041                 rdev->config.si.tile_config |= (2 << 0);
3042                 break;
3043         case 8:
3044         default:
3045                 /* XXX what about 12? */
3046                 rdev->config.si.tile_config |= (3 << 0);
3047                 break;
3048         }       
3049         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3050         case 0: /* four banks */
3051                 rdev->config.si.tile_config |= 0 << 4;
3052                 break;
3053         case 1: /* eight banks */
3054                 rdev->config.si.tile_config |= 1 << 4;
3055                 break;
3056         case 2: /* sixteen banks */
3057         default:
3058                 rdev->config.si.tile_config |= 2 << 4;
3059                 break;
3060         }
3061         rdev->config.si.tile_config |=
3062                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3063         rdev->config.si.tile_config |=
3064                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3065
3066         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3067         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3068         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3069         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3070         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3071         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3072         if (rdev->has_uvd) {
3073                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3074                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3075                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3076         }
3077
3078         si_tiling_mode_table_init(rdev);
3079
3080         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3081                     rdev->config.si.max_sh_per_se,
3082                     rdev->config.si.max_backends_per_se);
3083
3084         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3085                      rdev->config.si.max_sh_per_se,
3086                      rdev->config.si.max_cu_per_sh);
3087
3088
3089         /* set HW defaults for 3D engine */
3090         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3091                                      ROQ_IB2_START(0x2b)));
3092         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3093
3094         sx_debug_1 = RREG32(SX_DEBUG_1);
3095         WREG32(SX_DEBUG_1, sx_debug_1);
3096
3097         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3098
3099         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3100                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3101                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3102                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3103
3104         WREG32(VGT_NUM_INSTANCES, 1);
3105
3106         WREG32(CP_PERFMON_CNTL, 0);
3107
3108         WREG32(SQ_CONFIG, 0);
3109
3110         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3111                                           FORCE_EOV_MAX_REZ_CNT(255)));
3112
3113         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3114                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3115
3116         WREG32(VGT_GS_VERTEX_REUSE, 16);
3117         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3118
3119         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3120         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3121         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3122         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3123         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3124         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3125         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3126         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3127
3128         tmp = RREG32(HDP_MISC_CNTL);
3129         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3130         WREG32(HDP_MISC_CNTL, tmp);
3131
3132         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3133         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3134
3135         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3136
3137         udelay(50);
3138 }
3139
3140 /*
3141  * GPU scratch registers helpers function.
3142  */
3143 static void si_scratch_init(struct radeon_device *rdev)
3144 {
3145         int i;
3146
3147         rdev->scratch.num_reg = 7;
3148         rdev->scratch.reg_base = SCRATCH_REG0;
3149         for (i = 0; i < rdev->scratch.num_reg; i++) {
3150                 rdev->scratch.free[i] = true;
3151                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3152         }
3153 }
3154
3155 void si_fence_ring_emit(struct radeon_device *rdev,
3156                         struct radeon_fence *fence)
3157 {
3158         struct radeon_ring *ring = &rdev->ring[fence->ring];
3159         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3160
3161         /* flush read cache over gart */
3162         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3163         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3164         radeon_ring_write(ring, 0);
3165         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3166         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3167                           PACKET3_TC_ACTION_ENA |
3168                           PACKET3_SH_KCACHE_ACTION_ENA |
3169                           PACKET3_SH_ICACHE_ACTION_ENA);
3170         radeon_ring_write(ring, 0xFFFFFFFF);
3171         radeon_ring_write(ring, 0);
3172         radeon_ring_write(ring, 10); /* poll interval */
3173         /* EVENT_WRITE_EOP - flush caches, send int */
3174         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3175         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3176         radeon_ring_write(ring, addr & 0xffffffff);
3177         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3178         radeon_ring_write(ring, fence->seq);
3179         radeon_ring_write(ring, 0);
3180 }
3181
3182 /*
3183  * IB stuff
3184  */
3185 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3186 {
3187         struct radeon_ring *ring = &rdev->ring[ib->ring];
3188         u32 header;
3189
3190         if (ib->is_const_ib) {
3191                 /* set switch buffer packet before const IB */
3192                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3193                 radeon_ring_write(ring, 0);
3194
3195                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3196         } else {
3197                 u32 next_rptr;
3198                 if (ring->rptr_save_reg) {
3199                         next_rptr = ring->wptr + 3 + 4 + 8;
3200                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3201                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3202                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3203                         radeon_ring_write(ring, next_rptr);
3204                 } else if (rdev->wb.enabled) {
3205                         next_rptr = ring->wptr + 5 + 4 + 8;
3206                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3207                         radeon_ring_write(ring, (1 << 8));
3208                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3209                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3210                         radeon_ring_write(ring, next_rptr);
3211                 }
3212
3213                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3214         }
3215
3216         radeon_ring_write(ring, header);
3217         radeon_ring_write(ring,
3218 #ifdef __BIG_ENDIAN
3219                           (2 << 0) |
3220 #endif
3221                           (ib->gpu_addr & 0xFFFFFFFC));
3222         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3223         radeon_ring_write(ring, ib->length_dw |
3224                           (ib->vm ? (ib->vm->id << 24) : 0));
3225
3226         if (!ib->is_const_ib) {
3227                 /* flush read cache over gart for this vmid */
3228                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3229                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3230                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3231                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3232                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3233                                   PACKET3_TC_ACTION_ENA |
3234                                   PACKET3_SH_KCACHE_ACTION_ENA |
3235                                   PACKET3_SH_ICACHE_ACTION_ENA);
3236                 radeon_ring_write(ring, 0xFFFFFFFF);
3237                 radeon_ring_write(ring, 0);
3238                 radeon_ring_write(ring, 10); /* poll interval */
3239         }
3240 }
3241
3242 /*
3243  * CP.
3244  */
3245 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3246 {
3247         if (enable)
3248                 WREG32(CP_ME_CNTL, 0);
3249         else {
3250                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3251                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3252                 WREG32(SCRATCH_UMSK, 0);
3253                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3254                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3255                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3256         }
3257         udelay(50);
3258 }
3259
3260 static int si_cp_load_microcode(struct radeon_device *rdev)
3261 {
3262         const __be32 *fw_data;
3263         int i;
3264
3265         if (!rdev->me_fw || !rdev->pfp_fw)
3266                 return -EINVAL;
3267
3268         si_cp_enable(rdev, false);
3269
3270         /* PFP */
3271         fw_data = (const __be32 *)rdev->pfp_fw->data;
3272         WREG32(CP_PFP_UCODE_ADDR, 0);
3273         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3274                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3275         WREG32(CP_PFP_UCODE_ADDR, 0);
3276
3277         /* CE */
3278         fw_data = (const __be32 *)rdev->ce_fw->data;
3279         WREG32(CP_CE_UCODE_ADDR, 0);
3280         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3281                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3282         WREG32(CP_CE_UCODE_ADDR, 0);
3283
3284         /* ME */
3285         fw_data = (const __be32 *)rdev->me_fw->data;
3286         WREG32(CP_ME_RAM_WADDR, 0);
3287         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3288                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3289         WREG32(CP_ME_RAM_WADDR, 0);
3290
3291         WREG32(CP_PFP_UCODE_ADDR, 0);
3292         WREG32(CP_CE_UCODE_ADDR, 0);
3293         WREG32(CP_ME_RAM_WADDR, 0);
3294         WREG32(CP_ME_RAM_RADDR, 0);
3295         return 0;
3296 }
3297
3298 static int si_cp_start(struct radeon_device *rdev)
3299 {
3300         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3301         int r, i;
3302
3303         r = radeon_ring_lock(rdev, ring, 7 + 4);
3304         if (r) {
3305                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3306                 return r;
3307         }
3308         /* init the CP */
3309         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3310         radeon_ring_write(ring, 0x1);
3311         radeon_ring_write(ring, 0x0);
3312         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3313         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3314         radeon_ring_write(ring, 0);
3315         radeon_ring_write(ring, 0);
3316
3317         /* init the CE partitions */
3318         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3319         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3320         radeon_ring_write(ring, 0xc000);
3321         radeon_ring_write(ring, 0xe000);
3322         radeon_ring_unlock_commit(rdev, ring);
3323
3324         si_cp_enable(rdev, true);
3325
3326         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3327         if (r) {
3328                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3329                 return r;
3330         }
3331
3332         /* setup clear context state */
3333         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3334         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3335
3336         for (i = 0; i < si_default_size; i++)
3337                 radeon_ring_write(ring, si_default_state[i]);
3338
3339         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3340         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3341
3342         /* set clear context state */
3343         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3344         radeon_ring_write(ring, 0);
3345
3346         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3347         radeon_ring_write(ring, 0x00000316);
3348         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3349         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3350
3351         radeon_ring_unlock_commit(rdev, ring);
3352
3353         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3354                 ring = &rdev->ring[i];
3355                 r = radeon_ring_lock(rdev, ring, 2);
3356
3357                 /* clear the compute context state */
3358                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3359                 radeon_ring_write(ring, 0);
3360
3361                 radeon_ring_unlock_commit(rdev, ring);
3362         }
3363
3364         return 0;
3365 }
3366
3367 static void si_cp_fini(struct radeon_device *rdev)
3368 {
3369         struct radeon_ring *ring;
3370         si_cp_enable(rdev, false);
3371
3372         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3373         radeon_ring_fini(rdev, ring);
3374         radeon_scratch_free(rdev, ring->rptr_save_reg);
3375
3376         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3377         radeon_ring_fini(rdev, ring);
3378         radeon_scratch_free(rdev, ring->rptr_save_reg);
3379
3380         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3381         radeon_ring_fini(rdev, ring);
3382         radeon_scratch_free(rdev, ring->rptr_save_reg);
3383 }
3384
3385 static int si_cp_resume(struct radeon_device *rdev)
3386 {
3387         struct radeon_ring *ring;
3388         u32 tmp;
3389         u32 rb_bufsz;
3390         int r;
3391
3392         si_enable_gui_idle_interrupt(rdev, false);
3393
3394         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3395         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3396
3397         /* Set the write pointer delay */
3398         WREG32(CP_RB_WPTR_DELAY, 0);
3399
3400         WREG32(CP_DEBUG, 0);
3401         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3402
3403         /* ring 0 - compute and gfx */
3404         /* Set ring buffer size */
3405         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3406         rb_bufsz = order_base_2(ring->ring_size / 8);
3407         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3408 #ifdef __BIG_ENDIAN
3409         tmp |= BUF_SWAP_32BIT;
3410 #endif
3411         WREG32(CP_RB0_CNTL, tmp);
3412
3413         /* Initialize the ring buffer's read and write pointers */
3414         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3415         ring->wptr = 0;
3416         WREG32(CP_RB0_WPTR, ring->wptr);
3417
3418         /* set the wb address whether it's enabled or not */
3419         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3420         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3421
3422         if (rdev->wb.enabled)
3423                 WREG32(SCRATCH_UMSK, 0xff);
3424         else {
3425                 tmp |= RB_NO_UPDATE;
3426                 WREG32(SCRATCH_UMSK, 0);
3427         }
3428
3429         mdelay(1);
3430         WREG32(CP_RB0_CNTL, tmp);
3431
3432         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3433
3434         ring->rptr = RREG32(CP_RB0_RPTR);
3435
3436         /* ring1  - compute only */
3437         /* Set ring buffer size */
3438         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3439         rb_bufsz = order_base_2(ring->ring_size / 8);
3440         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3441 #ifdef __BIG_ENDIAN
3442         tmp |= BUF_SWAP_32BIT;
3443 #endif
3444         WREG32(CP_RB1_CNTL, tmp);
3445
3446         /* Initialize the ring buffer's read and write pointers */
3447         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3448         ring->wptr = 0;
3449         WREG32(CP_RB1_WPTR, ring->wptr);
3450
3451         /* set the wb address whether it's enabled or not */
3452         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3453         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3454
3455         mdelay(1);
3456         WREG32(CP_RB1_CNTL, tmp);
3457
3458         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3459
3460         ring->rptr = RREG32(CP_RB1_RPTR);
3461
3462         /* ring2 - compute only */
3463         /* Set ring buffer size */
3464         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3465         rb_bufsz = order_base_2(ring->ring_size / 8);
3466         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3467 #ifdef __BIG_ENDIAN
3468         tmp |= BUF_SWAP_32BIT;
3469 #endif
3470         WREG32(CP_RB2_CNTL, tmp);
3471
3472         /* Initialize the ring buffer's read and write pointers */
3473         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3474         ring->wptr = 0;
3475         WREG32(CP_RB2_WPTR, ring->wptr);
3476
3477         /* set the wb address whether it's enabled or not */
3478         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3479         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3480
3481         mdelay(1);
3482         WREG32(CP_RB2_CNTL, tmp);
3483
3484         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3485
3486         ring->rptr = RREG32(CP_RB2_RPTR);
3487
3488         /* start the rings */
3489         si_cp_start(rdev);
3490         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3491         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3492         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3493         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3494         if (r) {
3495                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3496                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3497                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3498                 return r;
3499         }
3500         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3501         if (r) {
3502                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3503         }
3504         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3505         if (r) {
3506                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3507         }
3508
3509         si_enable_gui_idle_interrupt(rdev, true);
3510
3511         return 0;
3512 }
3513
3514 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3515 {
3516         u32 reset_mask = 0;
3517         u32 tmp;
3518
3519         /* GRBM_STATUS */
3520         tmp = RREG32(GRBM_STATUS);
3521         if (tmp & (PA_BUSY | SC_BUSY |
3522                    BCI_BUSY | SX_BUSY |
3523                    TA_BUSY | VGT_BUSY |
3524                    DB_BUSY | CB_BUSY |
3525                    GDS_BUSY | SPI_BUSY |
3526                    IA_BUSY | IA_BUSY_NO_DMA))
3527                 reset_mask |= RADEON_RESET_GFX;
3528
3529         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3530                    CP_BUSY | CP_COHERENCY_BUSY))
3531                 reset_mask |= RADEON_RESET_CP;
3532
3533         if (tmp & GRBM_EE_BUSY)
3534                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3535
3536         /* GRBM_STATUS2 */
3537         tmp = RREG32(GRBM_STATUS2);
3538         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3539                 reset_mask |= RADEON_RESET_RLC;
3540
3541         /* DMA_STATUS_REG 0 */
3542         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3543         if (!(tmp & DMA_IDLE))
3544                 reset_mask |= RADEON_RESET_DMA;
3545
3546         /* DMA_STATUS_REG 1 */
3547         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3548         if (!(tmp & DMA_IDLE))
3549                 reset_mask |= RADEON_RESET_DMA1;
3550
3551         /* SRBM_STATUS2 */
3552         tmp = RREG32(SRBM_STATUS2);
3553         if (tmp & DMA_BUSY)
3554                 reset_mask |= RADEON_RESET_DMA;
3555
3556         if (tmp & DMA1_BUSY)
3557                 reset_mask |= RADEON_RESET_DMA1;
3558
3559         /* SRBM_STATUS */
3560         tmp = RREG32(SRBM_STATUS);
3561
3562         if (tmp & IH_BUSY)
3563                 reset_mask |= RADEON_RESET_IH;
3564
3565         if (tmp & SEM_BUSY)
3566                 reset_mask |= RADEON_RESET_SEM;
3567
3568         if (tmp & GRBM_RQ_PENDING)
3569                 reset_mask |= RADEON_RESET_GRBM;
3570
3571         if (tmp & VMC_BUSY)
3572                 reset_mask |= RADEON_RESET_VMC;
3573
3574         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3575                    MCC_BUSY | MCD_BUSY))
3576                 reset_mask |= RADEON_RESET_MC;
3577
3578         if (evergreen_is_display_hung(rdev))
3579                 reset_mask |= RADEON_RESET_DISPLAY;
3580
3581         /* VM_L2_STATUS */
3582         tmp = RREG32(VM_L2_STATUS);
3583         if (tmp & L2_BUSY)
3584                 reset_mask |= RADEON_RESET_VMC;
3585
3586         /* Skip MC reset as it's mostly likely not hung, just busy */
3587         if (reset_mask & RADEON_RESET_MC) {
3588                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3589                 reset_mask &= ~RADEON_RESET_MC;
3590         }
3591
3592         return reset_mask;
3593 }
3594
3595 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3596 {
3597         struct evergreen_mc_save save;
3598         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3599         u32 tmp;
3600
3601         if (reset_mask == 0)
3602                 return;
3603
3604         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3605
3606         evergreen_print_gpu_status_regs(rdev);
3607         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3608                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3609         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3610                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3611
3612         /* disable PG/CG */
3613         si_fini_pg(rdev);
3614         si_fini_cg(rdev);
3615
3616         /* stop the rlc */
3617         si_rlc_stop(rdev);
3618
3619         /* Disable CP parsing/prefetching */
3620         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3621
3622         if (reset_mask & RADEON_RESET_DMA) {
3623                 /* dma0 */
3624                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3625                 tmp &= ~DMA_RB_ENABLE;
3626                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3627         }
3628         if (reset_mask & RADEON_RESET_DMA1) {
3629                 /* dma1 */
3630                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3631                 tmp &= ~DMA_RB_ENABLE;
3632                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3633         }
3634
3635         udelay(50);
3636
3637         evergreen_mc_stop(rdev, &save);
3638         if (evergreen_mc_wait_for_idle(rdev)) {
3639                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3640         }
3641
3642         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3643                 grbm_soft_reset = SOFT_RESET_CB |
3644                         SOFT_RESET_DB |
3645                         SOFT_RESET_GDS |
3646                         SOFT_RESET_PA |
3647                         SOFT_RESET_SC |
3648                         SOFT_RESET_BCI |
3649                         SOFT_RESET_SPI |
3650                         SOFT_RESET_SX |
3651                         SOFT_RESET_TC |
3652                         SOFT_RESET_TA |
3653                         SOFT_RESET_VGT |
3654                         SOFT_RESET_IA;
3655         }
3656
3657         if (reset_mask & RADEON_RESET_CP) {
3658                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3659
3660                 srbm_soft_reset |= SOFT_RESET_GRBM;
3661         }
3662
3663         if (reset_mask & RADEON_RESET_DMA)
3664                 srbm_soft_reset |= SOFT_RESET_DMA;
3665
3666         if (reset_mask & RADEON_RESET_DMA1)
3667                 srbm_soft_reset |= SOFT_RESET_DMA1;
3668
3669         if (reset_mask & RADEON_RESET_DISPLAY)
3670                 srbm_soft_reset |= SOFT_RESET_DC;
3671
3672         if (reset_mask & RADEON_RESET_RLC)
3673                 grbm_soft_reset |= SOFT_RESET_RLC;
3674
3675         if (reset_mask & RADEON_RESET_SEM)
3676                 srbm_soft_reset |= SOFT_RESET_SEM;
3677
3678         if (reset_mask & RADEON_RESET_IH)
3679                 srbm_soft_reset |= SOFT_RESET_IH;
3680
3681         if (reset_mask & RADEON_RESET_GRBM)
3682                 srbm_soft_reset |= SOFT_RESET_GRBM;
3683
3684         if (reset_mask & RADEON_RESET_VMC)
3685                 srbm_soft_reset |= SOFT_RESET_VMC;
3686
3687         if (reset_mask & RADEON_RESET_MC)
3688                 srbm_soft_reset |= SOFT_RESET_MC;
3689
3690         if (grbm_soft_reset) {
3691                 tmp = RREG32(GRBM_SOFT_RESET);
3692                 tmp |= grbm_soft_reset;
3693                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3694                 WREG32(GRBM_SOFT_RESET, tmp);
3695                 tmp = RREG32(GRBM_SOFT_RESET);
3696
3697                 udelay(50);
3698
3699                 tmp &= ~grbm_soft_reset;
3700                 WREG32(GRBM_SOFT_RESET, tmp);
3701                 tmp = RREG32(GRBM_SOFT_RESET);
3702         }
3703
3704         if (srbm_soft_reset) {
3705                 tmp = RREG32(SRBM_SOFT_RESET);
3706                 tmp |= srbm_soft_reset;
3707                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3708                 WREG32(SRBM_SOFT_RESET, tmp);
3709                 tmp = RREG32(SRBM_SOFT_RESET);
3710
3711                 udelay(50);
3712
3713                 tmp &= ~srbm_soft_reset;
3714                 WREG32(SRBM_SOFT_RESET, tmp);
3715                 tmp = RREG32(SRBM_SOFT_RESET);
3716         }
3717
3718         /* Wait a little for things to settle down */
3719         udelay(50);
3720
3721         evergreen_mc_resume(rdev, &save);
3722         udelay(50);
3723
3724         evergreen_print_gpu_status_regs(rdev);
3725 }
3726
3727 int si_asic_reset(struct radeon_device *rdev)
3728 {
3729         u32 reset_mask;
3730
3731         reset_mask = si_gpu_check_soft_reset(rdev);
3732
3733         if (reset_mask)
3734                 r600_set_bios_scratch_engine_hung(rdev, true);
3735
3736         si_gpu_soft_reset(rdev, reset_mask);
3737
3738         reset_mask = si_gpu_check_soft_reset(rdev);
3739
3740         if (!reset_mask)
3741                 r600_set_bios_scratch_engine_hung(rdev, false);
3742
3743         return 0;
3744 }
3745
3746 /**
3747  * si_gfx_is_lockup - Check if the GFX engine is locked up
3748  *
3749  * @rdev: radeon_device pointer
3750  * @ring: radeon_ring structure holding ring information
3751  *
3752  * Check if the GFX engine is locked up.
3753  * Returns true if the engine appears to be locked up, false if not.
3754  */
3755 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3756 {
3757         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3758
3759         if (!(reset_mask & (RADEON_RESET_GFX |
3760                             RADEON_RESET_COMPUTE |
3761                             RADEON_RESET_CP))) {
3762                 radeon_ring_lockup_update(ring);
3763                 return false;
3764         }
3765         /* force CP activities */
3766         radeon_ring_force_activity(rdev, ring);
3767         return radeon_ring_test_lockup(rdev, ring);
3768 }
3769
3770 /* MC */
3771 static void si_mc_program(struct radeon_device *rdev)
3772 {
3773         struct evergreen_mc_save save;
3774         u32 tmp;
3775         int i, j;
3776
3777         /* Initialize HDP */
3778         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3779                 WREG32((0x2c14 + j), 0x00000000);
3780                 WREG32((0x2c18 + j), 0x00000000);
3781                 WREG32((0x2c1c + j), 0x00000000);
3782                 WREG32((0x2c20 + j), 0x00000000);
3783                 WREG32((0x2c24 + j), 0x00000000);
3784         }
3785         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3786
3787         evergreen_mc_stop(rdev, &save);
3788         if (radeon_mc_wait_for_idle(rdev)) {
3789                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3790         }
3791         if (!ASIC_IS_NODCE(rdev))
3792                 /* Lockout access through VGA aperture*/
3793                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3794         /* Update configuration */
3795         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3796                rdev->mc.vram_start >> 12);
3797         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3798                rdev->mc.vram_end >> 12);
3799         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3800                rdev->vram_scratch.gpu_addr >> 12);
3801         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3802         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3803         WREG32(MC_VM_FB_LOCATION, tmp);
3804         /* XXX double check these! */
3805         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3806         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3807         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3808         WREG32(MC_VM_AGP_BASE, 0);
3809         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3810         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3811         if (radeon_mc_wait_for_idle(rdev)) {
3812                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3813         }
3814         evergreen_mc_resume(rdev, &save);
3815         if (!ASIC_IS_NODCE(rdev)) {
3816                 /* we need to own VRAM, so turn off the VGA renderer here
3817                  * to stop it overwriting our objects */
3818                 rv515_vga_render_disable(rdev);
3819         }
3820 }
3821
3822 void si_vram_gtt_location(struct radeon_device *rdev,
3823                           struct radeon_mc *mc)
3824 {
3825         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3826                 /* leave room for at least 1024M GTT */
3827                 dev_warn(rdev->dev, "limiting VRAM\n");
3828                 mc->real_vram_size = 0xFFC0000000ULL;
3829                 mc->mc_vram_size = 0xFFC0000000ULL;
3830         }
3831         radeon_vram_location(rdev, &rdev->mc, 0);
3832         rdev->mc.gtt_base_align = 0;
3833         radeon_gtt_location(rdev, mc);
3834 }
3835
3836 static int si_mc_init(struct radeon_device *rdev)
3837 {
3838         u32 tmp;
3839         int chansize, numchan;
3840
3841         /* Get VRAM informations */
3842         rdev->mc.vram_is_ddr = true;
3843         tmp = RREG32(MC_ARB_RAMCFG);
3844         if (tmp & CHANSIZE_OVERRIDE) {
3845                 chansize = 16;
3846         } else if (tmp & CHANSIZE_MASK) {
3847                 chansize = 64;
3848         } else {
3849                 chansize = 32;
3850         }
3851         tmp = RREG32(MC_SHARED_CHMAP);
3852         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3853         case 0:
3854         default:
3855                 numchan = 1;
3856                 break;
3857         case 1:
3858                 numchan = 2;
3859                 break;
3860         case 2:
3861                 numchan = 4;
3862                 break;
3863         case 3:
3864                 numchan = 8;
3865                 break;
3866         case 4:
3867                 numchan = 3;
3868                 break;
3869         case 5:
3870                 numchan = 6;
3871                 break;
3872         case 6:
3873                 numchan = 10;
3874                 break;
3875         case 7:
3876                 numchan = 12;
3877                 break;
3878         case 8:
3879                 numchan = 16;
3880                 break;
3881         }
3882         rdev->mc.vram_width = numchan * chansize;
3883         /* Could aper size report 0 ? */
3884         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3885         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3886         /* size in MB on si */
3887         tmp = RREG32(CONFIG_MEMSIZE);
3888         /* some boards may have garbage in the upper 16 bits */
3889         if (tmp & 0xffff0000) {
3890                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
3891                 if (tmp & 0xffff)
3892                         tmp &= 0xffff;
3893         }
3894         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
3895         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
3896         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3897         si_vram_gtt_location(rdev, &rdev->mc);
3898         radeon_update_bandwidth_info(rdev);
3899
3900         return 0;
3901 }
3902
3903 /*
3904  * GART
3905  */
3906 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3907 {
3908         /* flush hdp cache */
3909         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3910
3911         /* bits 0-15 are the VM contexts0-15 */
3912         WREG32(VM_INVALIDATE_REQUEST, 1);
3913 }
3914
3915 static int si_pcie_gart_enable(struct radeon_device *rdev)
3916 {
3917         int r, i;
3918
3919         if (rdev->gart.robj == NULL) {
3920                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3921                 return -EINVAL;
3922         }
3923         r = radeon_gart_table_vram_pin(rdev);
3924         if (r)
3925                 return r;
3926         radeon_gart_restore(rdev);
3927         /* Setup TLB control */
3928         WREG32(MC_VM_MX_L1_TLB_CNTL,
3929                (0xA << 7) |
3930                ENABLE_L1_TLB |
3931                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3932                ENABLE_ADVANCED_DRIVER_MODEL |
3933                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3934         /* Setup L2 cache */
3935         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3936                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3937                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3938                EFFECTIVE_L2_QUEUE_SIZE(7) |
3939                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3940         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3941         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3942                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3943         /* setup context0 */
3944         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3945         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3946         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3947         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3948                         (u32)(rdev->dummy_page.addr >> 12));
3949         WREG32(VM_CONTEXT0_CNTL2, 0);
3950         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3951                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3952
3953         WREG32(0x15D4, 0);
3954         WREG32(0x15D8, 0);
3955         WREG32(0x15DC, 0);
3956
3957         /* empty context1-15 */
3958         /* set vm size, must be a multiple of 4 */
3959         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3960         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3961         /* Assign the pt base to something valid for now; the pts used for
3962          * the VMs are determined by the application and setup and assigned
3963          * on the fly in the vm part of radeon_gart.c
3964          */
3965         for (i = 1; i < 16; i++) {
3966                 if (i < 8)
3967                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3968                                rdev->gart.table_addr >> 12);
3969                 else
3970                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3971                                rdev->gart.table_addr >> 12);
3972         }
3973
3974         /* enable context1-15 */
3975         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3976                (u32)(rdev->dummy_page.addr >> 12));
3977         WREG32(VM_CONTEXT1_CNTL2, 4);
3978         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3979                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3983                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3984                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3985                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3986                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3987                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3988                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3989                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3990                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3991
3992         si_pcie_gart_tlb_flush(rdev);
3993         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3994                  (unsigned)(rdev->mc.gtt_size >> 20),
3995                  (unsigned long long)rdev->gart.table_addr);
3996         rdev->gart.ready = true;
3997         return 0;
3998 }
3999
4000 static void si_pcie_gart_disable(struct radeon_device *rdev)
4001 {
4002         /* Disable all tables */
4003         WREG32(VM_CONTEXT0_CNTL, 0);
4004         WREG32(VM_CONTEXT1_CNTL, 0);
4005         /* Setup TLB control */
4006         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4007                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4008         /* Setup L2 cache */
4009         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4010                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4011                EFFECTIVE_L2_QUEUE_SIZE(7) |
4012                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4013         WREG32(VM_L2_CNTL2, 0);
4014         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4015                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4016         radeon_gart_table_vram_unpin(rdev);
4017 }
4018
4019 static void si_pcie_gart_fini(struct radeon_device *rdev)
4020 {
4021         si_pcie_gart_disable(rdev);
4022         radeon_gart_table_vram_free(rdev);
4023         radeon_gart_fini(rdev);
4024 }
4025
4026 /* vm parser */
4027 static bool si_vm_reg_valid(u32 reg)
4028 {
4029         /* context regs are fine */
4030         if (reg >= 0x28000)
4031                 return true;
4032
4033         /* check config regs */
4034         switch (reg) {
4035         case GRBM_GFX_INDEX:
4036         case CP_STRMOUT_CNTL:
4037         case VGT_VTX_VECT_EJECT_REG:
4038         case VGT_CACHE_INVALIDATION:
4039         case VGT_ESGS_RING_SIZE:
4040         case VGT_GSVS_RING_SIZE:
4041         case VGT_GS_VERTEX_REUSE:
4042         case VGT_PRIMITIVE_TYPE:
4043         case VGT_INDEX_TYPE:
4044         case VGT_NUM_INDICES:
4045         case VGT_NUM_INSTANCES:
4046         case VGT_TF_RING_SIZE:
4047         case VGT_HS_OFFCHIP_PARAM:
4048         case VGT_TF_MEMORY_BASE:
4049         case PA_CL_ENHANCE:
4050         case PA_SU_LINE_STIPPLE_VALUE:
4051         case PA_SC_LINE_STIPPLE_STATE:
4052         case PA_SC_ENHANCE:
4053         case SQC_CACHES:
4054         case SPI_STATIC_THREAD_MGMT_1:
4055         case SPI_STATIC_THREAD_MGMT_2:
4056         case SPI_STATIC_THREAD_MGMT_3:
4057         case SPI_PS_MAX_WAVE_ID:
4058         case SPI_CONFIG_CNTL:
4059         case SPI_CONFIG_CNTL_1:
4060         case TA_CNTL_AUX:
4061                 return true;
4062         default:
4063                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4064                 return false;
4065         }
4066 }
4067
4068 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4069                                   u32 *ib, struct radeon_cs_packet *pkt)
4070 {
4071         switch (pkt->opcode) {
4072         case PACKET3_NOP:
4073         case PACKET3_SET_BASE:
4074         case PACKET3_SET_CE_DE_COUNTERS:
4075         case PACKET3_LOAD_CONST_RAM:
4076         case PACKET3_WRITE_CONST_RAM:
4077         case PACKET3_WRITE_CONST_RAM_OFFSET:
4078         case PACKET3_DUMP_CONST_RAM:
4079         case PACKET3_INCREMENT_CE_COUNTER:
4080         case PACKET3_WAIT_ON_DE_COUNTER:
4081         case PACKET3_CE_WRITE:
4082                 break;
4083         default:
4084                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4085                 return -EINVAL;
4086         }
4087         return 0;
4088 }
4089
4090 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4091 {
4092         u32 start_reg, reg, i;
4093         u32 command = ib[idx + 4];
4094         u32 info = ib[idx + 1];
4095         u32 idx_value = ib[idx];
4096         if (command & PACKET3_CP_DMA_CMD_SAS) {
4097                 /* src address space is register */
4098                 if (((info & 0x60000000) >> 29) == 0) {
4099                         start_reg = idx_value << 2;
4100                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4101                                 reg = start_reg;
4102                                 if (!si_vm_reg_valid(reg)) {
4103                                         DRM_ERROR("CP DMA Bad SRC register\n");
4104                                         return -EINVAL;
4105                                 }
4106                         } else {
4107                                 for (i = 0; i < (command & 0x1fffff); i++) {
4108                                         reg = start_reg + (4 * i);
4109                                         if (!si_vm_reg_valid(reg)) {
4110                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4111                                                 return -EINVAL;
4112                                         }
4113                                 }
4114                         }
4115                 }
4116         }
4117         if (command & PACKET3_CP_DMA_CMD_DAS) {
4118                 /* dst address space is register */
4119                 if (((info & 0x00300000) >> 20) == 0) {
4120                         start_reg = ib[idx + 2];
4121                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4122                                 reg = start_reg;
4123                                 if (!si_vm_reg_valid(reg)) {
4124                                         DRM_ERROR("CP DMA Bad DST register\n");
4125                                         return -EINVAL;
4126                                 }
4127                         } else {
4128                                 for (i = 0; i < (command & 0x1fffff); i++) {
4129                                         reg = start_reg + (4 * i);
4130                                 if (!si_vm_reg_valid(reg)) {
4131                                                 DRM_ERROR("CP DMA Bad DST register\n");
4132                                                 return -EINVAL;
4133                                         }
4134                                 }
4135                         }
4136                 }
4137         }
4138         return 0;
4139 }
4140
4141 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4142                                    u32 *ib, struct radeon_cs_packet *pkt)
4143 {
4144         int r;
4145         u32 idx = pkt->idx + 1;
4146         u32 idx_value = ib[idx];
4147         u32 start_reg, end_reg, reg, i;
4148
4149         switch (pkt->opcode) {
4150         case PACKET3_NOP:
4151         case PACKET3_SET_BASE:
4152         case PACKET3_CLEAR_STATE:
4153         case PACKET3_INDEX_BUFFER_SIZE:
4154         case PACKET3_DISPATCH_DIRECT:
4155         case PACKET3_DISPATCH_INDIRECT:
4156         case PACKET3_ALLOC_GDS:
4157         case PACKET3_WRITE_GDS_RAM:
4158         case PACKET3_ATOMIC_GDS:
4159         case PACKET3_ATOMIC:
4160         case PACKET3_OCCLUSION_QUERY:
4161         case PACKET3_SET_PREDICATION:
4162         case PACKET3_COND_EXEC:
4163         case PACKET3_PRED_EXEC:
4164         case PACKET3_DRAW_INDIRECT:
4165         case PACKET3_DRAW_INDEX_INDIRECT:
4166         case PACKET3_INDEX_BASE:
4167         case PACKET3_DRAW_INDEX_2:
4168         case PACKET3_CONTEXT_CONTROL:
4169         case PACKET3_INDEX_TYPE:
4170         case PACKET3_DRAW_INDIRECT_MULTI:
4171         case PACKET3_DRAW_INDEX_AUTO:
4172         case PACKET3_DRAW_INDEX_IMMD:
4173         case PACKET3_NUM_INSTANCES:
4174         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4175         case PACKET3_STRMOUT_BUFFER_UPDATE:
4176         case PACKET3_DRAW_INDEX_OFFSET_2:
4177         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4178         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4179         case PACKET3_MPEG_INDEX:
4180         case PACKET3_WAIT_REG_MEM:
4181         case PACKET3_MEM_WRITE:
4182         case PACKET3_PFP_SYNC_ME:
4183         case PACKET3_SURFACE_SYNC:
4184         case PACKET3_EVENT_WRITE:
4185         case PACKET3_EVENT_WRITE_EOP:
4186         case PACKET3_EVENT_WRITE_EOS:
4187         case PACKET3_SET_CONTEXT_REG:
4188         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4189         case PACKET3_SET_SH_REG:
4190         case PACKET3_SET_SH_REG_OFFSET:
4191         case PACKET3_INCREMENT_DE_COUNTER:
4192         case PACKET3_WAIT_ON_CE_COUNTER:
4193         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4194         case PACKET3_ME_WRITE:
4195                 break;
4196         case PACKET3_COPY_DATA:
4197                 if ((idx_value & 0xf00) == 0) {
4198                         reg = ib[idx + 3] * 4;
4199                         if (!si_vm_reg_valid(reg))
4200                                 return -EINVAL;
4201                 }
4202                 break;
4203         case PACKET3_WRITE_DATA:
4204                 if ((idx_value & 0xf00) == 0) {
4205                         start_reg = ib[idx + 1] * 4;
4206                         if (idx_value & 0x10000) {
4207                                 if (!si_vm_reg_valid(start_reg))
4208                                         return -EINVAL;
4209                         } else {
4210                                 for (i = 0; i < (pkt->count - 2); i++) {
4211                                         reg = start_reg + (4 * i);
4212                                         if (!si_vm_reg_valid(reg))
4213                                                 return -EINVAL;
4214                                 }
4215                         }
4216                 }
4217                 break;
4218         case PACKET3_COND_WRITE:
4219                 if (idx_value & 0x100) {
4220                         reg = ib[idx + 5] * 4;
4221                         if (!si_vm_reg_valid(reg))
4222                                 return -EINVAL;
4223                 }
4224                 break;
4225         case PACKET3_COPY_DW:
4226                 if (idx_value & 0x2) {
4227                         reg = ib[idx + 3] * 4;
4228                         if (!si_vm_reg_valid(reg))
4229                                 return -EINVAL;
4230                 }
4231                 break;
4232         case PACKET3_SET_CONFIG_REG:
4233                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4234                 end_reg = 4 * pkt->count + start_reg - 4;
4235                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4236                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4237                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4238                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4239                         return -EINVAL;
4240                 }
4241                 for (i = 0; i < pkt->count; i++) {
4242                         reg = start_reg + (4 * i);
4243                         if (!si_vm_reg_valid(reg))
4244                                 return -EINVAL;
4245                 }
4246                 break;
4247         case PACKET3_CP_DMA:
4248                 r = si_vm_packet3_cp_dma_check(ib, idx);
4249                 if (r)
4250                         return r;
4251                 break;
4252         default:
4253                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4254                 return -EINVAL;
4255         }
4256         return 0;
4257 }
4258
4259 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4260                                        u32 *ib, struct radeon_cs_packet *pkt)
4261 {
4262         int r;
4263         u32 idx = pkt->idx + 1;
4264         u32 idx_value = ib[idx];
4265         u32 start_reg, reg, i;
4266
4267         switch (pkt->opcode) {
4268         case PACKET3_NOP:
4269         case PACKET3_SET_BASE:
4270         case PACKET3_CLEAR_STATE:
4271         case PACKET3_DISPATCH_DIRECT:
4272         case PACKET3_DISPATCH_INDIRECT:
4273         case PACKET3_ALLOC_GDS:
4274         case PACKET3_WRITE_GDS_RAM:
4275         case PACKET3_ATOMIC_GDS:
4276         case PACKET3_ATOMIC:
4277         case PACKET3_OCCLUSION_QUERY:
4278         case PACKET3_SET_PREDICATION:
4279         case PACKET3_COND_EXEC:
4280         case PACKET3_PRED_EXEC:
4281         case PACKET3_CONTEXT_CONTROL:
4282         case PACKET3_STRMOUT_BUFFER_UPDATE:
4283         case PACKET3_WAIT_REG_MEM:
4284         case PACKET3_MEM_WRITE:
4285         case PACKET3_PFP_SYNC_ME:
4286         case PACKET3_SURFACE_SYNC:
4287         case PACKET3_EVENT_WRITE:
4288         case PACKET3_EVENT_WRITE_EOP:
4289         case PACKET3_EVENT_WRITE_EOS:
4290         case PACKET3_SET_CONTEXT_REG:
4291         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4292         case PACKET3_SET_SH_REG:
4293         case PACKET3_SET_SH_REG_OFFSET:
4294         case PACKET3_INCREMENT_DE_COUNTER:
4295         case PACKET3_WAIT_ON_CE_COUNTER:
4296         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4297         case PACKET3_ME_WRITE:
4298                 break;
4299         case PACKET3_COPY_DATA:
4300                 if ((idx_value & 0xf00) == 0) {
4301                         reg = ib[idx + 3] * 4;
4302                         if (!si_vm_reg_valid(reg))
4303                                 return -EINVAL;
4304                 }
4305                 break;
4306         case PACKET3_WRITE_DATA:
4307                 if ((idx_value & 0xf00) == 0) {
4308                         start_reg = ib[idx + 1] * 4;
4309                         if (idx_value & 0x10000) {
4310                                 if (!si_vm_reg_valid(start_reg))
4311                                         return -EINVAL;
4312                         } else {
4313                                 for (i = 0; i < (pkt->count - 2); i++) {
4314                                         reg = start_reg + (4 * i);
4315                                         if (!si_vm_reg_valid(reg))
4316                                                 return -EINVAL;
4317                                 }
4318                         }
4319                 }
4320                 break;
4321         case PACKET3_COND_WRITE:
4322                 if (idx_value & 0x100) {
4323                         reg = ib[idx + 5] * 4;
4324                         if (!si_vm_reg_valid(reg))
4325                                 return -EINVAL;
4326                 }
4327                 break;
4328         case PACKET3_COPY_DW:
4329                 if (idx_value & 0x2) {
4330                         reg = ib[idx + 3] * 4;
4331                         if (!si_vm_reg_valid(reg))
4332                                 return -EINVAL;
4333                 }
4334                 break;
4335         case PACKET3_CP_DMA:
4336                 r = si_vm_packet3_cp_dma_check(ib, idx);
4337                 if (r)
4338                         return r;
4339                 break;
4340         default:
4341                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4342                 return -EINVAL;
4343         }
4344         return 0;
4345 }
4346
4347 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4348 {
4349         int ret = 0;
4350         u32 idx = 0;
4351         struct radeon_cs_packet pkt;
4352
4353         do {
4354                 pkt.idx = idx;
4355                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4356                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4357                 pkt.one_reg_wr = 0;
4358                 switch (pkt.type) {
4359                 case RADEON_PACKET_TYPE0:
4360                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4361                         ret = -EINVAL;
4362                         break;
4363                 case RADEON_PACKET_TYPE2:
4364                         idx += 1;
4365                         break;
4366                 case RADEON_PACKET_TYPE3:
4367                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4368                         if (ib->is_const_ib)
4369                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4370                         else {
4371                                 switch (ib->ring) {
4372                                 case RADEON_RING_TYPE_GFX_INDEX:
4373                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4374                                         break;
4375                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4376                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4377                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4378                                         break;
4379                                 default:
4380                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4381                                         ret = -EINVAL;
4382                                         break;
4383                                 }
4384                         }
4385                         idx += pkt.count + 2;
4386                         break;
4387                 default:
4388                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4389                         ret = -EINVAL;
4390                         break;
4391                 }
4392                 if (ret)
4393                         break;
4394         } while (idx < ib->length_dw);
4395
4396         return ret;
4397 }
4398
4399 /*
4400  * vm
4401  */
4402 int si_vm_init(struct radeon_device *rdev)
4403 {
4404         /* number of VMs */
4405         rdev->vm_manager.nvm = 16;
4406         /* base offset of vram pages */
4407         rdev->vm_manager.vram_base_offset = 0;
4408
4409         return 0;
4410 }
4411
4412 void si_vm_fini(struct radeon_device *rdev)
4413 {
4414 }
4415
4416 /**
4417  * si_vm_decode_fault - print human readable fault info
4418  *
4419  * @rdev: radeon_device pointer
4420  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4421  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4422  *
4423  * Print human readable fault information (SI).
4424  */
4425 static void si_vm_decode_fault(struct radeon_device *rdev,
4426                                u32 status, u32 addr)
4427 {
4428         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4429         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4430         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4431         char *block;
4432
4433         if (rdev->family == CHIP_TAHITI) {
4434                 switch (mc_id) {
4435                 case 160:
4436                 case 144:
4437                 case 96:
4438                 case 80:
4439                 case 224:
4440                 case 208:
4441                 case 32:
4442                 case 16:
4443                         block = "CB";
4444                         break;
4445                 case 161:
4446                 case 145:
4447                 case 97:
4448                 case 81:
4449                 case 225:
4450                 case 209:
4451                 case 33:
4452                 case 17:
4453                         block = "CB_FMASK";
4454                         break;
4455                 case 162:
4456                 case 146:
4457                 case 98:
4458                 case 82:
4459                 case 226:
4460                 case 210:
4461                 case 34:
4462                 case 18:
4463                         block = "CB_CMASK";
4464                         break;
4465                 case 163:
4466                 case 147:
4467                 case 99:
4468                 case 83:
4469                 case 227:
4470                 case 211:
4471                 case 35:
4472                 case 19:
4473                         block = "CB_IMMED";
4474                         break;
4475                 case 164:
4476                 case 148:
4477                 case 100:
4478                 case 84:
4479                 case 228:
4480                 case 212:
4481                 case 36:
4482                 case 20:
4483                         block = "DB";
4484                         break;
4485                 case 165:
4486                 case 149:
4487                 case 101:
4488                 case 85:
4489                 case 229:
4490                 case 213:
4491                 case 37:
4492                 case 21:
4493                         block = "DB_HTILE";
4494                         break;
4495                 case 167:
4496                 case 151:
4497                 case 103:
4498                 case 87:
4499                 case 231:
4500                 case 215:
4501                 case 39:
4502                 case 23:
4503                         block = "DB_STEN";
4504                         break;
4505                 case 72:
4506                 case 68:
4507                 case 64:
4508                 case 8:
4509                 case 4:
4510                 case 0:
4511                 case 136:
4512                 case 132:
4513                 case 128:
4514                 case 200:
4515                 case 196:
4516                 case 192:
4517                         block = "TC";
4518                         break;
4519                 case 112:
4520                 case 48:
4521                         block = "CP";
4522                         break;
4523                 case 49:
4524                 case 177:
4525                 case 50:
4526                 case 178:
4527                         block = "SH";
4528                         break;
4529                 case 53:
4530                 case 190:
4531                         block = "VGT";
4532                         break;
4533                 case 117:
4534                         block = "IH";
4535                         break;
4536                 case 51:
4537                 case 115:
4538                         block = "RLC";
4539                         break;
4540                 case 119:
4541                 case 183:
4542                         block = "DMA0";
4543                         break;
4544                 case 61:
4545                         block = "DMA1";
4546                         break;
4547                 case 248:
4548                 case 120:
4549                         block = "HDP";
4550                         break;
4551                 default:
4552                         block = "unknown";
4553                         break;
4554                 }
4555         } else {
4556                 switch (mc_id) {
4557                 case 32:
4558                 case 16:
4559                 case 96:
4560                 case 80:
4561                 case 160:
4562                 case 144:
4563                 case 224:
4564                 case 208:
4565                         block = "CB";
4566                         break;
4567                 case 33:
4568                 case 17:
4569                 case 97:
4570                 case 81:
4571                 case 161:
4572                 case 145:
4573                 case 225:
4574                 case 209:
4575                         block = "CB_FMASK";
4576                         break;
4577                 case 34:
4578                 case 18:
4579                 case 98:
4580                 case 82:
4581                 case 162:
4582                 case 146:
4583                 case 226:
4584                 case 210:
4585                         block = "CB_CMASK";
4586                         break;
4587                 case 35:
4588                 case 19:
4589                 case 99:
4590                 case 83:
4591                 case 163:
4592                 case 147:
4593                 case 227:
4594                 case 211:
4595                         block = "CB_IMMED";
4596                         break;
4597                 case 36:
4598                 case 20:
4599                 case 100:
4600                 case 84:
4601                 case 164:
4602                 case 148:
4603                 case 228:
4604                 case 212:
4605                         block = "DB";
4606                         break;
4607                 case 37:
4608                 case 21:
4609                 case 101:
4610                 case 85:
4611                 case 165:
4612                 case 149:
4613                 case 229:
4614                 case 213:
4615                         block = "DB_HTILE";
4616                         break;
4617                 case 39:
4618                 case 23:
4619                 case 103:
4620                 case 87:
4621                 case 167:
4622                 case 151:
4623                 case 231:
4624                 case 215:
4625                         block = "DB_STEN";
4626                         break;
4627                 case 72:
4628                 case 68:
4629                 case 8:
4630                 case 4:
4631                 case 136:
4632                 case 132:
4633                 case 200:
4634                 case 196:
4635                         block = "TC";
4636                         break;
4637                 case 112:
4638                 case 48:
4639                         block = "CP";
4640                         break;
4641                 case 49:
4642                 case 177:
4643                 case 50:
4644                 case 178:
4645                         block = "SH";
4646                         break;
4647                 case 53:
4648                         block = "VGT";
4649                         break;
4650                 case 117:
4651                         block = "IH";
4652                         break;
4653                 case 51:
4654                 case 115:
4655                         block = "RLC";
4656                         break;
4657                 case 119:
4658                 case 183:
4659                         block = "DMA0";
4660                         break;
4661                 case 61:
4662                         block = "DMA1";
4663                         break;
4664                 case 248:
4665                 case 120:
4666                         block = "HDP";
4667                         break;
4668                 default:
4669                         block = "unknown";
4670                         break;
4671                 }
4672         }
4673
4674         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4675                protections, vmid, addr,
4676                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4677                block, mc_id);
4678 }
4679
4680 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4681 {
4682         struct radeon_ring *ring = &rdev->ring[ridx];
4683
4684         if (vm == NULL)
4685                 return;
4686
4687         /* write new base address */
4688         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4689         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4690                                  WRITE_DATA_DST_SEL(0)));
4691
4692         if (vm->id < 8) {
4693                 radeon_ring_write(ring,
4694                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4695         } else {
4696                 radeon_ring_write(ring,
4697                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4698         }
4699         radeon_ring_write(ring, 0);
4700         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4701
4702         /* flush hdp cache */
4703         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4704         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4705                                  WRITE_DATA_DST_SEL(0)));
4706         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4707         radeon_ring_write(ring, 0);
4708         radeon_ring_write(ring, 0x1);
4709
4710         /* bits 0-15 are the VM contexts0-15 */
4711         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4712         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4713                                  WRITE_DATA_DST_SEL(0)));
4714         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4715         radeon_ring_write(ring, 0);
4716         radeon_ring_write(ring, 1 << vm->id);
4717
4718         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4719         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4720         radeon_ring_write(ring, 0x0);
4721 }
4722
4723 /*
4724  *  Power and clock gating
4725  */
4726 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4727 {
4728         int i;
4729
4730         for (i = 0; i < rdev->usec_timeout; i++) {
4731                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4732                         break;
4733                 udelay(1);
4734         }
4735
4736         for (i = 0; i < rdev->usec_timeout; i++) {
4737                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4738                         break;
4739                 udelay(1);
4740         }
4741 }
4742
4743 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4744                                          bool enable)
4745 {
4746         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4747         u32 mask;
4748         int i;
4749
4750         if (enable)
4751                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4752         else
4753                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4754         WREG32(CP_INT_CNTL_RING0, tmp);
4755
4756         if (!enable) {
4757                 /* read a gfx register */
4758                 tmp = RREG32(DB_DEPTH_INFO);
4759
4760                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4761                 for (i = 0; i < rdev->usec_timeout; i++) {
4762                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4763                                 break;
4764                         udelay(1);
4765                 }
4766         }
4767 }
4768
4769 static void si_set_uvd_dcm(struct radeon_device *rdev,
4770                            bool sw_mode)
4771 {
4772         u32 tmp, tmp2;
4773
4774         tmp = RREG32(UVD_CGC_CTRL);
4775         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4776         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4777
4778         if (sw_mode) {
4779                 tmp &= ~0x7ffff800;
4780                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4781         } else {
4782                 tmp |= 0x7ffff800;
4783                 tmp2 = 0;
4784         }
4785
4786         WREG32(UVD_CGC_CTRL, tmp);
4787         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4788 }
4789
4790 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4791 {
4792         bool hw_mode = true;
4793
4794         if (hw_mode) {
4795                 si_set_uvd_dcm(rdev, false);
4796         } else {
4797                 u32 tmp = RREG32(UVD_CGC_CTRL);
4798                 tmp &= ~DCM;
4799                 WREG32(UVD_CGC_CTRL, tmp);
4800         }
4801 }
4802
4803 static u32 si_halt_rlc(struct radeon_device *rdev)
4804 {
4805         u32 data, orig;
4806
4807         orig = data = RREG32(RLC_CNTL);
4808
4809         if (data & RLC_ENABLE) {
4810                 data &= ~RLC_ENABLE;
4811                 WREG32(RLC_CNTL, data);
4812
4813                 si_wait_for_rlc_serdes(rdev);
4814         }
4815
4816         return orig;
4817 }
4818
4819 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4820 {
4821         u32 tmp;
4822
4823         tmp = RREG32(RLC_CNTL);
4824         if (tmp != rlc)
4825                 WREG32(RLC_CNTL, rlc);
4826 }
4827
4828 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4829 {
4830         u32 data, orig;
4831
4832         orig = data = RREG32(DMA_PG);
4833         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4834                 data |= PG_CNTL_ENABLE;
4835         else
4836                 data &= ~PG_CNTL_ENABLE;
4837         if (orig != data)
4838                 WREG32(DMA_PG, data);
4839 }
4840
4841 static void si_init_dma_pg(struct radeon_device *rdev)
4842 {
4843         u32 tmp;
4844
4845         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4846         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4847
4848         for (tmp = 0; tmp < 5; tmp++)
4849                 WREG32(DMA_PGFSM_WRITE, 0);
4850 }
4851
4852 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4853                                bool enable)
4854 {
4855         u32 tmp;
4856
4857         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4858                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4859                 WREG32(RLC_TTOP_D, tmp);
4860
4861                 tmp = RREG32(RLC_PG_CNTL);
4862                 tmp |= GFX_PG_ENABLE;
4863                 WREG32(RLC_PG_CNTL, tmp);
4864
4865                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4866                 tmp |= AUTO_PG_EN;
4867                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4868         } else {
4869                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4870                 tmp &= ~AUTO_PG_EN;
4871                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4872
4873                 tmp = RREG32(DB_RENDER_CONTROL);
4874         }
4875 }
4876
4877 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4878 {
4879         u32 tmp;
4880
4881         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4882
4883         tmp = RREG32(RLC_PG_CNTL);
4884         tmp |= GFX_PG_SRC;
4885         WREG32(RLC_PG_CNTL, tmp);
4886
4887         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4888
4889         tmp = RREG32(RLC_AUTO_PG_CTRL);
4890
4891         tmp &= ~GRBM_REG_SGIT_MASK;
4892         tmp |= GRBM_REG_SGIT(0x700);
4893         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4894         WREG32(RLC_AUTO_PG_CTRL, tmp);
4895 }
4896
4897 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4898 {
4899         u32 mask = 0, tmp, tmp1;
4900         int i;
4901
4902         si_select_se_sh(rdev, se, sh);
4903         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4904         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4905         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4906
4907         tmp &= 0xffff0000;
4908
4909         tmp |= tmp1;
4910         tmp >>= 16;
4911
4912         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4913                 mask <<= 1;
4914                 mask |= 1;
4915         }
4916
4917         return (~tmp) & mask;
4918 }
4919
4920 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4921 {
4922         u32 i, j, k, active_cu_number = 0;
4923         u32 mask, counter, cu_bitmap;
4924         u32 tmp = 0;
4925
4926         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4927                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4928                         mask = 1;
4929                         cu_bitmap = 0;
4930                         counter  = 0;
4931                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4932                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4933                                         if (counter < 2)
4934                                                 cu_bitmap |= mask;
4935                                         counter++;
4936                                 }
4937                                 mask <<= 1;
4938                         }
4939
4940                         active_cu_number += counter;
4941                         tmp |= (cu_bitmap << (i * 16 + j * 8));
4942                 }
4943         }
4944
4945         WREG32(RLC_PG_AO_CU_MASK, tmp);
4946
4947         tmp = RREG32(RLC_MAX_PG_CU);
4948         tmp &= ~MAX_PU_CU_MASK;
4949         tmp |= MAX_PU_CU(active_cu_number);
4950         WREG32(RLC_MAX_PG_CU, tmp);
4951 }
4952
4953 static void si_enable_cgcg(struct radeon_device *rdev,
4954                            bool enable)
4955 {
4956         u32 data, orig, tmp;
4957
4958         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4959
4960         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
4961                 si_enable_gui_idle_interrupt(rdev, true);
4962
4963                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4964
4965                 tmp = si_halt_rlc(rdev);
4966
4967                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4968                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4969                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
4970
4971                 si_wait_for_rlc_serdes(rdev);
4972
4973                 si_update_rlc(rdev, tmp);
4974
4975                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
4976
4977                 data |= CGCG_EN | CGLS_EN;
4978         } else {
4979                 si_enable_gui_idle_interrupt(rdev, false);
4980
4981                 RREG32(CB_CGTT_SCLK_CTRL);
4982                 RREG32(CB_CGTT_SCLK_CTRL);
4983                 RREG32(CB_CGTT_SCLK_CTRL);
4984                 RREG32(CB_CGTT_SCLK_CTRL);
4985
4986                 data &= ~(CGCG_EN | CGLS_EN);
4987         }
4988
4989         if (orig != data)
4990                 WREG32(RLC_CGCG_CGLS_CTRL, data);
4991 }
4992
4993 static void si_enable_mgcg(struct radeon_device *rdev,
4994                            bool enable)
4995 {
4996         u32 data, orig, tmp = 0;
4997
4998         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
4999                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5000                 data = 0x96940200;
5001                 if (orig != data)
5002                         WREG32(CGTS_SM_CTRL_REG, data);
5003
5004                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5005                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5006                         data |= CP_MEM_LS_EN;
5007                         if (orig != data)
5008                                 WREG32(CP_MEM_SLP_CNTL, data);
5009                 }
5010
5011                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5012                 data &= 0xffffffc0;
5013                 if (orig != data)
5014                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5015
5016                 tmp = si_halt_rlc(rdev);
5017
5018                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5019                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5020                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5021
5022                 si_update_rlc(rdev, tmp);
5023         } else {
5024                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5025                 data |= 0x00000003;
5026                 if (orig != data)
5027                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5028
5029                 data = RREG32(CP_MEM_SLP_CNTL);
5030                 if (data & CP_MEM_LS_EN) {
5031                         data &= ~CP_MEM_LS_EN;
5032                         WREG32(CP_MEM_SLP_CNTL, data);
5033                 }
5034                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5035                 data |= LS_OVERRIDE | OVERRIDE;
5036                 if (orig != data)
5037                         WREG32(CGTS_SM_CTRL_REG, data);
5038
5039                 tmp = si_halt_rlc(rdev);
5040
5041                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5042                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5043                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5044
5045                 si_update_rlc(rdev, tmp);
5046         }
5047 }
5048
5049 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5050                                bool enable)
5051 {
5052         u32 orig, data, tmp;
5053
5054         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5055                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5056                 tmp |= 0x3fff;
5057                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5058
5059                 orig = data = RREG32(UVD_CGC_CTRL);
5060                 data |= DCM;
5061                 if (orig != data)
5062                         WREG32(UVD_CGC_CTRL, data);
5063
5064                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5065                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5066         } else {
5067                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5068                 tmp &= ~0x3fff;
5069                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5070
5071                 orig = data = RREG32(UVD_CGC_CTRL);
5072                 data &= ~DCM;
5073                 if (orig != data)
5074                         WREG32(UVD_CGC_CTRL, data);
5075
5076                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5077                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5078         }
5079 }
5080
5081 static const u32 mc_cg_registers[] =
5082 {
5083         MC_HUB_MISC_HUB_CG,
5084         MC_HUB_MISC_SIP_CG,
5085         MC_HUB_MISC_VM_CG,
5086         MC_XPB_CLK_GAT,
5087         ATC_MISC_CG,
5088         MC_CITF_MISC_WR_CG,
5089         MC_CITF_MISC_RD_CG,
5090         MC_CITF_MISC_VM_CG,
5091         VM_L2_CG,
5092 };
5093
5094 static void si_enable_mc_ls(struct radeon_device *rdev,
5095                             bool enable)
5096 {
5097         int i;
5098         u32 orig, data;
5099
5100         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5101                 orig = data = RREG32(mc_cg_registers[i]);
5102                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5103                         data |= MC_LS_ENABLE;
5104                 else
5105                         data &= ~MC_LS_ENABLE;
5106                 if (data != orig)
5107                         WREG32(mc_cg_registers[i], data);
5108         }
5109 }
5110
5111 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5112                                bool enable)
5113 {
5114         int i;
5115         u32 orig, data;
5116
5117         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5118                 orig = data = RREG32(mc_cg_registers[i]);
5119                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5120                         data |= MC_CG_ENABLE;
5121                 else
5122                         data &= ~MC_CG_ENABLE;
5123                 if (data != orig)
5124                         WREG32(mc_cg_registers[i], data);
5125         }
5126 }
5127
5128 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5129                                bool enable)
5130 {
5131         u32 orig, data, offset;
5132         int i;
5133
5134         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5135                 for (i = 0; i < 2; i++) {
5136                         if (i == 0)
5137                                 offset = DMA0_REGISTER_OFFSET;
5138                         else
5139                                 offset = DMA1_REGISTER_OFFSET;
5140                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5141                         data &= ~MEM_POWER_OVERRIDE;
5142                         if (data != orig)
5143                                 WREG32(DMA_POWER_CNTL + offset, data);
5144                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5145                 }
5146         } else {
5147                 for (i = 0; i < 2; i++) {
5148                         if (i == 0)
5149                                 offset = DMA0_REGISTER_OFFSET;
5150                         else
5151                                 offset = DMA1_REGISTER_OFFSET;
5152                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5153                         data |= MEM_POWER_OVERRIDE;
5154                         if (data != orig)
5155                                 WREG32(DMA_POWER_CNTL + offset, data);
5156
5157                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5158                         data = 0xff000000;
5159                         if (data != orig)
5160                                 WREG32(DMA_CLK_CTRL + offset, data);
5161                 }
5162         }
5163 }
5164
5165 static void si_enable_bif_mgls(struct radeon_device *rdev,
5166                                bool enable)
5167 {
5168         u32 orig, data;
5169
5170         orig = data = RREG32_PCIE(PCIE_CNTL2);
5171
5172         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5173                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5174                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5175         else
5176                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5177                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5178
5179         if (orig != data)
5180                 WREG32_PCIE(PCIE_CNTL2, data);
5181 }
5182
5183 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5184                                bool enable)
5185 {
5186         u32 orig, data;
5187
5188         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5189
5190         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5191                 data &= ~CLOCK_GATING_DIS;
5192         else
5193                 data |= CLOCK_GATING_DIS;
5194
5195         if (orig != data)
5196                 WREG32(HDP_HOST_PATH_CNTL, data);
5197 }
5198
5199 static void si_enable_hdp_ls(struct radeon_device *rdev,
5200                              bool enable)
5201 {
5202         u32 orig, data;
5203
5204         orig = data = RREG32(HDP_MEM_POWER_LS);
5205
5206         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5207                 data |= HDP_LS_ENABLE;
5208         else
5209                 data &= ~HDP_LS_ENABLE;
5210
5211         if (orig != data)
5212                 WREG32(HDP_MEM_POWER_LS, data);
5213 }
5214
5215 void si_update_cg(struct radeon_device *rdev,
5216                   u32 block, bool enable)
5217 {
5218         if (block & RADEON_CG_BLOCK_GFX) {
5219                 si_enable_gui_idle_interrupt(rdev, false);
5220                 /* order matters! */
5221                 if (enable) {
5222                         si_enable_mgcg(rdev, true);
5223                         si_enable_cgcg(rdev, true);
5224                 } else {
5225                         si_enable_cgcg(rdev, false);
5226                         si_enable_mgcg(rdev, false);
5227                 }
5228                 si_enable_gui_idle_interrupt(rdev, true);
5229         }
5230
5231         if (block & RADEON_CG_BLOCK_MC) {
5232                 si_enable_mc_mgcg(rdev, enable);
5233                 si_enable_mc_ls(rdev, enable);
5234         }
5235
5236         if (block & RADEON_CG_BLOCK_SDMA) {
5237                 si_enable_dma_mgcg(rdev, enable);
5238         }
5239
5240         if (block & RADEON_CG_BLOCK_BIF) {
5241                 si_enable_bif_mgls(rdev, enable);
5242         }
5243
5244         if (block & RADEON_CG_BLOCK_UVD) {
5245                 if (rdev->has_uvd) {
5246                         si_enable_uvd_mgcg(rdev, enable);
5247                 }
5248         }
5249
5250         if (block & RADEON_CG_BLOCK_HDP) {
5251                 si_enable_hdp_mgcg(rdev, enable);
5252                 si_enable_hdp_ls(rdev, enable);
5253         }
5254 }
5255
5256 static void si_init_cg(struct radeon_device *rdev)
5257 {
5258         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5259                             RADEON_CG_BLOCK_MC |
5260                             RADEON_CG_BLOCK_SDMA |
5261                             RADEON_CG_BLOCK_BIF |
5262                             RADEON_CG_BLOCK_HDP), true);
5263         if (rdev->has_uvd) {
5264                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5265                 si_init_uvd_internal_cg(rdev);
5266         }
5267 }
5268
5269 static void si_fini_cg(struct radeon_device *rdev)
5270 {
5271         if (rdev->has_uvd) {
5272                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5273         }
5274         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5275                             RADEON_CG_BLOCK_MC |
5276                             RADEON_CG_BLOCK_SDMA |
5277                             RADEON_CG_BLOCK_BIF |
5278                             RADEON_CG_BLOCK_HDP), false);
5279 }
5280
5281 u32 si_get_csb_size(struct radeon_device *rdev)
5282 {
5283         u32 count = 0;
5284         const struct cs_section_def *sect = NULL;
5285         const struct cs_extent_def *ext = NULL;
5286
5287         if (rdev->rlc.cs_data == NULL)
5288                 return 0;
5289
5290         /* begin clear state */
5291         count += 2;
5292         /* context control state */
5293         count += 3;
5294
5295         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5296                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5297                         if (sect->id == SECT_CONTEXT)
5298                                 count += 2 + ext->reg_count;
5299                         else
5300                                 return 0;
5301                 }
5302         }
5303         /* pa_sc_raster_config */
5304         count += 3;
5305         /* end clear state */
5306         count += 2;
5307         /* clear state */
5308         count += 2;
5309
5310         return count;
5311 }
5312
5313 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5314 {
5315         u32 count = 0, i;
5316         const struct cs_section_def *sect = NULL;
5317         const struct cs_extent_def *ext = NULL;
5318
5319         if (rdev->rlc.cs_data == NULL)
5320                 return;
5321         if (buffer == NULL)
5322                 return;
5323
5324         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5325         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5326
5327         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5328         buffer[count++] = cpu_to_le32(0x80000000);
5329         buffer[count++] = cpu_to_le32(0x80000000);
5330
5331         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5332                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5333                         if (sect->id == SECT_CONTEXT) {
5334                                 buffer[count++] =
5335                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5336                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5337                                 for (i = 0; i < ext->reg_count; i++)
5338                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5339                         } else {
5340                                 return;
5341                         }
5342                 }
5343         }
5344
5345         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5346         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5347         switch (rdev->family) {
5348         case CHIP_TAHITI:
5349         case CHIP_PITCAIRN:
5350                 buffer[count++] = cpu_to_le32(0x2a00126a);
5351                 break;
5352         case CHIP_VERDE:
5353                 buffer[count++] = cpu_to_le32(0x0000124a);
5354                 break;
5355         case CHIP_OLAND:
5356                 buffer[count++] = cpu_to_le32(0x00000082);
5357                 break;
5358         case CHIP_HAINAN:
5359                 buffer[count++] = cpu_to_le32(0x00000000);
5360                 break;
5361         default:
5362                 buffer[count++] = cpu_to_le32(0x00000000);
5363                 break;
5364         }
5365
5366         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5367         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5368
5369         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5370         buffer[count++] = cpu_to_le32(0);
5371 }
5372
5373 static void si_init_pg(struct radeon_device *rdev)
5374 {
5375         if (rdev->pg_flags) {
5376                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5377                         si_init_dma_pg(rdev);
5378                 }
5379                 si_init_ao_cu_mask(rdev);
5380                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5381                         si_init_gfx_cgpg(rdev);
5382                 }
5383                 si_enable_dma_pg(rdev, true);
5384                 si_enable_gfx_cgpg(rdev, true);
5385         } else {
5386                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5387                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5388         }
5389 }
5390
5391 static void si_fini_pg(struct radeon_device *rdev)
5392 {
5393         if (rdev->pg_flags) {
5394                 si_enable_dma_pg(rdev, false);
5395                 si_enable_gfx_cgpg(rdev, false);
5396         }
5397 }
5398
5399 /*
5400  * RLC
5401  */
5402 void si_rlc_reset(struct radeon_device *rdev)
5403 {
5404         u32 tmp = RREG32(GRBM_SOFT_RESET);
5405
5406         tmp |= SOFT_RESET_RLC;
5407         WREG32(GRBM_SOFT_RESET, tmp);
5408         udelay(50);
5409         tmp &= ~SOFT_RESET_RLC;
5410         WREG32(GRBM_SOFT_RESET, tmp);
5411         udelay(50);
5412 }
5413
5414 static void si_rlc_stop(struct radeon_device *rdev)
5415 {
5416         WREG32(RLC_CNTL, 0);
5417
5418         si_enable_gui_idle_interrupt(rdev, false);
5419
5420         si_wait_for_rlc_serdes(rdev);
5421 }
5422
5423 static void si_rlc_start(struct radeon_device *rdev)
5424 {
5425         WREG32(RLC_CNTL, RLC_ENABLE);
5426
5427         si_enable_gui_idle_interrupt(rdev, true);
5428
5429         udelay(50);
5430 }
5431
5432 static bool si_lbpw_supported(struct radeon_device *rdev)
5433 {
5434         u32 tmp;
5435
5436         /* Enable LBPW only for DDR3 */
5437         tmp = RREG32(MC_SEQ_MISC0);
5438         if ((tmp & 0xF0000000) == 0xB0000000)
5439                 return true;
5440         return false;
5441 }
5442
5443 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5444 {
5445         u32 tmp;
5446
5447         tmp = RREG32(RLC_LB_CNTL);
5448         if (enable)
5449                 tmp |= LOAD_BALANCE_ENABLE;
5450         else
5451                 tmp &= ~LOAD_BALANCE_ENABLE;
5452         WREG32(RLC_LB_CNTL, tmp);
5453
5454         if (!enable) {
5455                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5456                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5457         }
5458 }
5459
5460 static int si_rlc_resume(struct radeon_device *rdev)
5461 {
5462         u32 i;
5463         const __be32 *fw_data;
5464
5465         if (!rdev->rlc_fw)
5466                 return -EINVAL;
5467
5468         si_rlc_stop(rdev);
5469
5470         si_rlc_reset(rdev);
5471
5472         si_init_pg(rdev);
5473
5474         si_init_cg(rdev);
5475
5476         WREG32(RLC_RL_BASE, 0);
5477         WREG32(RLC_RL_SIZE, 0);
5478         WREG32(RLC_LB_CNTL, 0);
5479         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5480         WREG32(RLC_LB_CNTR_INIT, 0);
5481         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5482
5483         WREG32(RLC_MC_CNTL, 0);
5484         WREG32(RLC_UCODE_CNTL, 0);
5485
5486         fw_data = (const __be32 *)rdev->rlc_fw->data;
5487         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5488                 WREG32(RLC_UCODE_ADDR, i);
5489                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5490         }
5491         WREG32(RLC_UCODE_ADDR, 0);
5492
5493         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5494
5495         si_rlc_start(rdev);
5496
5497         return 0;
5498 }
5499
5500 static void si_enable_interrupts(struct radeon_device *rdev)
5501 {
5502         u32 ih_cntl = RREG32(IH_CNTL);
5503         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5504
5505         ih_cntl |= ENABLE_INTR;
5506         ih_rb_cntl |= IH_RB_ENABLE;
5507         WREG32(IH_CNTL, ih_cntl);
5508         WREG32(IH_RB_CNTL, ih_rb_cntl);
5509         rdev->ih.enabled = true;
5510 }
5511
5512 static void si_disable_interrupts(struct radeon_device *rdev)
5513 {
5514         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5515         u32 ih_cntl = RREG32(IH_CNTL);
5516
5517         ih_rb_cntl &= ~IH_RB_ENABLE;
5518         ih_cntl &= ~ENABLE_INTR;
5519         WREG32(IH_RB_CNTL, ih_rb_cntl);
5520         WREG32(IH_CNTL, ih_cntl);
5521         /* set rptr, wptr to 0 */
5522         WREG32(IH_RB_RPTR, 0);
5523         WREG32(IH_RB_WPTR, 0);
5524         rdev->ih.enabled = false;
5525         rdev->ih.rptr = 0;
5526 }
5527
5528 static void si_disable_interrupt_state(struct radeon_device *rdev)
5529 {
5530         u32 tmp;
5531
5532         tmp = RREG32(CP_INT_CNTL_RING0) &
5533                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5534         WREG32(CP_INT_CNTL_RING0, tmp);
5535         WREG32(CP_INT_CNTL_RING1, 0);
5536         WREG32(CP_INT_CNTL_RING2, 0);
5537         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5538         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5539         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5540         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5541         WREG32(GRBM_INT_CNTL, 0);
5542         if (rdev->num_crtc >= 2) {
5543                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5544                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5545         }
5546         if (rdev->num_crtc >= 4) {
5547                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5548                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5549         }
5550         if (rdev->num_crtc >= 6) {
5551                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5552                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5553         }
5554
5555         if (rdev->num_crtc >= 2) {
5556                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5557                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5558         }
5559         if (rdev->num_crtc >= 4) {
5560                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5561                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5562         }
5563         if (rdev->num_crtc >= 6) {
5564                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5565                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5566         }
5567
5568         if (!ASIC_IS_NODCE(rdev)) {
5569                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5570
5571                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5572                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5573                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5574                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5575                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5576                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5577                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5578                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5579                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5580                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5581                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5582                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5583         }
5584 }
5585
5586 static int si_irq_init(struct radeon_device *rdev)
5587 {
5588         int ret = 0;
5589         int rb_bufsz;
5590         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5591
5592         /* allocate ring */
5593         ret = r600_ih_ring_alloc(rdev);
5594         if (ret)
5595                 return ret;
5596
5597         /* disable irqs */
5598         si_disable_interrupts(rdev);
5599
5600         /* init rlc */
5601         ret = si_rlc_resume(rdev);
5602         if (ret) {
5603                 r600_ih_ring_fini(rdev);
5604                 return ret;
5605         }
5606
5607         /* setup interrupt control */
5608         /* set dummy read address to ring address */
5609         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5610         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5611         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5612          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5613          */
5614         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5615         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5616         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5617         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5618
5619         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5620         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5621
5622         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5623                       IH_WPTR_OVERFLOW_CLEAR |
5624                       (rb_bufsz << 1));
5625
5626         if (rdev->wb.enabled)
5627                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5628
5629         /* set the writeback address whether it's enabled or not */
5630         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5631         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5632
5633         WREG32(IH_RB_CNTL, ih_rb_cntl);
5634
5635         /* set rptr, wptr to 0 */
5636         WREG32(IH_RB_RPTR, 0);
5637         WREG32(IH_RB_WPTR, 0);
5638
5639         /* Default settings for IH_CNTL (disabled at first) */
5640         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5641         /* RPTR_REARM only works if msi's are enabled */
5642         if (rdev->msi_enabled)
5643                 ih_cntl |= RPTR_REARM;
5644         WREG32(IH_CNTL, ih_cntl);
5645
5646         /* force the active interrupt state to all disabled */
5647         si_disable_interrupt_state(rdev);
5648
5649         pci_set_master(rdev->pdev);
5650
5651         /* enable irqs */
5652         si_enable_interrupts(rdev);
5653
5654         return ret;
5655 }
5656
5657 int si_irq_set(struct radeon_device *rdev)
5658 {
5659         u32 cp_int_cntl;
5660         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5661         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5662         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5663         u32 grbm_int_cntl = 0;
5664         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5665         u32 dma_cntl, dma_cntl1;
5666         u32 thermal_int = 0;
5667
5668         if (!rdev->irq.installed) {
5669                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5670                 return -EINVAL;
5671         }
5672         /* don't enable anything if the ih is disabled */
5673         if (!rdev->ih.enabled) {
5674                 si_disable_interrupts(rdev);
5675                 /* force the active interrupt state to all disabled */
5676                 si_disable_interrupt_state(rdev);
5677                 return 0;
5678         }
5679
5680         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5681                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5682
5683         if (!ASIC_IS_NODCE(rdev)) {
5684                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5685                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5686                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5687                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5688                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5689                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5690         }
5691
5692         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5693         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5694
5695         thermal_int = RREG32(CG_THERMAL_INT) &
5696                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5697
5698         /* enable CP interrupts on all rings */
5699         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5700                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5701                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5702         }
5703         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5704                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5705                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5706         }
5707         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5708                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5709                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5710         }
5711         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5712                 DRM_DEBUG("si_irq_set: sw int dma\n");
5713                 dma_cntl |= TRAP_ENABLE;
5714         }
5715
5716         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5717                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5718                 dma_cntl1 |= TRAP_ENABLE;
5719         }
5720         if (rdev->irq.crtc_vblank_int[0] ||
5721             atomic_read(&rdev->irq.pflip[0])) {
5722                 DRM_DEBUG("si_irq_set: vblank 0\n");
5723                 crtc1 |= VBLANK_INT_MASK;
5724         }
5725         if (rdev->irq.crtc_vblank_int[1] ||
5726             atomic_read(&rdev->irq.pflip[1])) {
5727                 DRM_DEBUG("si_irq_set: vblank 1\n");
5728                 crtc2 |= VBLANK_INT_MASK;
5729         }
5730         if (rdev->irq.crtc_vblank_int[2] ||
5731             atomic_read(&rdev->irq.pflip[2])) {
5732                 DRM_DEBUG("si_irq_set: vblank 2\n");
5733                 crtc3 |= VBLANK_INT_MASK;
5734         }
5735         if (rdev->irq.crtc_vblank_int[3] ||
5736             atomic_read(&rdev->irq.pflip[3])) {
5737                 DRM_DEBUG("si_irq_set: vblank 3\n");
5738                 crtc4 |= VBLANK_INT_MASK;
5739         }
5740         if (rdev->irq.crtc_vblank_int[4] ||
5741             atomic_read(&rdev->irq.pflip[4])) {
5742                 DRM_DEBUG("si_irq_set: vblank 4\n");
5743                 crtc5 |= VBLANK_INT_MASK;
5744         }
5745         if (rdev->irq.crtc_vblank_int[5] ||
5746             atomic_read(&rdev->irq.pflip[5])) {
5747                 DRM_DEBUG("si_irq_set: vblank 5\n");
5748                 crtc6 |= VBLANK_INT_MASK;
5749         }
5750         if (rdev->irq.hpd[0]) {
5751                 DRM_DEBUG("si_irq_set: hpd 1\n");
5752                 hpd1 |= DC_HPDx_INT_EN;
5753         }
5754         if (rdev->irq.hpd[1]) {
5755                 DRM_DEBUG("si_irq_set: hpd 2\n");
5756                 hpd2 |= DC_HPDx_INT_EN;
5757         }
5758         if (rdev->irq.hpd[2]) {
5759                 DRM_DEBUG("si_irq_set: hpd 3\n");
5760                 hpd3 |= DC_HPDx_INT_EN;
5761         }
5762         if (rdev->irq.hpd[3]) {
5763                 DRM_DEBUG("si_irq_set: hpd 4\n");
5764                 hpd4 |= DC_HPDx_INT_EN;
5765         }
5766         if (rdev->irq.hpd[4]) {
5767                 DRM_DEBUG("si_irq_set: hpd 5\n");
5768                 hpd5 |= DC_HPDx_INT_EN;
5769         }
5770         if (rdev->irq.hpd[5]) {
5771                 DRM_DEBUG("si_irq_set: hpd 6\n");
5772                 hpd6 |= DC_HPDx_INT_EN;
5773         }
5774
5775         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5776         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5777         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5778
5779         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5780         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5781
5782         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5783
5784         if (rdev->irq.dpm_thermal) {
5785                 DRM_DEBUG("dpm thermal\n");
5786                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5787         }
5788
5789         if (rdev->num_crtc >= 2) {
5790                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5791                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5792         }
5793         if (rdev->num_crtc >= 4) {
5794                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5795                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5796         }
5797         if (rdev->num_crtc >= 6) {
5798                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5799                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5800         }
5801
5802         if (rdev->num_crtc >= 2) {
5803                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5804                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5805         }
5806         if (rdev->num_crtc >= 4) {
5807                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5808                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5809         }
5810         if (rdev->num_crtc >= 6) {
5811                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5812                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5813         }
5814
5815         if (!ASIC_IS_NODCE(rdev)) {
5816                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5817                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5818                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5819                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5820                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5821                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5822         }
5823
5824         WREG32(CG_THERMAL_INT, thermal_int);
5825
5826         return 0;
5827 }
5828
5829 static inline void si_irq_ack(struct radeon_device *rdev)
5830 {
5831         u32 tmp;
5832
5833         if (ASIC_IS_NODCE(rdev))
5834                 return;
5835
5836         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5837         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5838         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5839         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5840         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5841         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5842         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5843         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5844         if (rdev->num_crtc >= 4) {
5845                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5846                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5847         }
5848         if (rdev->num_crtc >= 6) {
5849                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5850                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5851         }
5852
5853         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5854                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5855         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5856                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5857         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5858                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5859         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5860                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5861         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5862                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5863         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5864                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5865
5866         if (rdev->num_crtc >= 4) {
5867                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5868                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5869                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5870                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5871                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5872                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5873                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5874                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5875                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5876                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5877                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5878                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5879         }
5880
5881         if (rdev->num_crtc >= 6) {
5882                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5883                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5884                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5885                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5886                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5887                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5888                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5889                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5890                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5891                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5892                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5893                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5894         }
5895
5896         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5897                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5898                 tmp |= DC_HPDx_INT_ACK;
5899                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5900         }
5901         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5902                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5903                 tmp |= DC_HPDx_INT_ACK;
5904                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5905         }
5906         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5907                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5908                 tmp |= DC_HPDx_INT_ACK;
5909                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5910         }
5911         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5912                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5913                 tmp |= DC_HPDx_INT_ACK;
5914                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5915         }
5916         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5917                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5918                 tmp |= DC_HPDx_INT_ACK;
5919                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5920         }
5921         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5922                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5923                 tmp |= DC_HPDx_INT_ACK;
5924                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5925         }
5926 }
5927
5928 static void si_irq_disable(struct radeon_device *rdev)
5929 {
5930         si_disable_interrupts(rdev);
5931         /* Wait and acknowledge irq */
5932         mdelay(1);
5933         si_irq_ack(rdev);
5934         si_disable_interrupt_state(rdev);
5935 }
5936
5937 static void si_irq_suspend(struct radeon_device *rdev)
5938 {
5939         si_irq_disable(rdev);
5940         si_rlc_stop(rdev);
5941 }
5942
5943 static void si_irq_fini(struct radeon_device *rdev)
5944 {
5945         si_irq_suspend(rdev);
5946         r600_ih_ring_fini(rdev);
5947 }
5948
5949 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5950 {
5951         u32 wptr, tmp;
5952
5953         if (rdev->wb.enabled)
5954                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5955         else
5956                 wptr = RREG32(IH_RB_WPTR);
5957
5958         if (wptr & RB_OVERFLOW) {
5959                 /* When a ring buffer overflow happen start parsing interrupt
5960                  * from the last not overwritten vector (wptr + 16). Hopefully
5961                  * this should allow us to catchup.
5962                  */
5963                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5964                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5965                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5966                 tmp = RREG32(IH_RB_CNTL);
5967                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5968                 WREG32(IH_RB_CNTL, tmp);
5969         }
5970         return (wptr & rdev->ih.ptr_mask);
5971 }
5972
5973 /*        SI IV Ring
5974  * Each IV ring entry is 128 bits:
5975  * [7:0]    - interrupt source id
5976  * [31:8]   - reserved
5977  * [59:32]  - interrupt source data
5978  * [63:60]  - reserved
5979  * [71:64]  - RINGID
5980  * [79:72]  - VMID
5981  * [127:80] - reserved
5982  */
5983 int si_irq_process(struct radeon_device *rdev)
5984 {
5985         u32 wptr;
5986         u32 rptr;
5987         u32 src_id, src_data, ring_id;
5988         u32 ring_index;
5989         bool queue_hotplug = false;
5990         bool queue_thermal = false;
5991         u32 status, addr;
5992
5993         if (!rdev->ih.enabled || rdev->shutdown)
5994                 return IRQ_NONE;
5995
5996         wptr = si_get_ih_wptr(rdev);
5997
5998 restart_ih:
5999         /* is somebody else already processing irqs? */
6000         if (atomic_xchg(&rdev->ih.lock, 1))
6001                 return IRQ_NONE;
6002
6003         rptr = rdev->ih.rptr;
6004         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6005
6006         /* Order reading of wptr vs. reading of IH ring data */
6007         rmb();
6008
6009         /* display interrupts */
6010         si_irq_ack(rdev);
6011
6012         while (rptr != wptr) {
6013                 /* wptr/rptr are in bytes! */
6014                 ring_index = rptr / 4;
6015                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6016                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6017                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6018
6019                 switch (src_id) {
6020                 case 1: /* D1 vblank/vline */
6021                         switch (src_data) {
6022                         case 0: /* D1 vblank */
6023                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6024                                         if (rdev->irq.crtc_vblank_int[0]) {
6025                                                 drm_handle_vblank(rdev->ddev, 0);
6026                                                 rdev->pm.vblank_sync = true;
6027                                                 wake_up(&rdev->irq.vblank_queue);
6028                                         }
6029                                         if (atomic_read(&rdev->irq.pflip[0]))
6030                                                 radeon_crtc_handle_flip(rdev, 0);
6031                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6032                                         DRM_DEBUG("IH: D1 vblank\n");
6033                                 }
6034                                 break;
6035                         case 1: /* D1 vline */
6036                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6037                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6038                                         DRM_DEBUG("IH: D1 vline\n");
6039                                 }
6040                                 break;
6041                         default:
6042                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6043                                 break;
6044                         }
6045                         break;
6046                 case 2: /* D2 vblank/vline */
6047                         switch (src_data) {
6048                         case 0: /* D2 vblank */
6049                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6050                                         if (rdev->irq.crtc_vblank_int[1]) {
6051                                                 drm_handle_vblank(rdev->ddev, 1);
6052                                                 rdev->pm.vblank_sync = true;
6053                                                 wake_up(&rdev->irq.vblank_queue);
6054                                         }
6055                                         if (atomic_read(&rdev->irq.pflip[1]))
6056                                                 radeon_crtc_handle_flip(rdev, 1);
6057                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6058                                         DRM_DEBUG("IH: D2 vblank\n");
6059                                 }
6060                                 break;
6061                         case 1: /* D2 vline */
6062                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6063                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6064                                         DRM_DEBUG("IH: D2 vline\n");
6065                                 }
6066                                 break;
6067                         default:
6068                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6069                                 break;
6070                         }
6071                         break;
6072                 case 3: /* D3 vblank/vline */
6073                         switch (src_data) {
6074                         case 0: /* D3 vblank */
6075                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6076                                         if (rdev->irq.crtc_vblank_int[2]) {
6077                                                 drm_handle_vblank(rdev->ddev, 2);
6078                                                 rdev->pm.vblank_sync = true;
6079                                                 wake_up(&rdev->irq.vblank_queue);
6080                                         }
6081                                         if (atomic_read(&rdev->irq.pflip[2]))
6082                                                 radeon_crtc_handle_flip(rdev, 2);
6083                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6084                                         DRM_DEBUG("IH: D3 vblank\n");
6085                                 }
6086                                 break;
6087                         case 1: /* D3 vline */
6088                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6089                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6090                                         DRM_DEBUG("IH: D3 vline\n");
6091                                 }
6092                                 break;
6093                         default:
6094                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6095                                 break;
6096                         }
6097                         break;
6098                 case 4: /* D4 vblank/vline */
6099                         switch (src_data) {
6100                         case 0: /* D4 vblank */
6101                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6102                                         if (rdev->irq.crtc_vblank_int[3]) {
6103                                                 drm_handle_vblank(rdev->ddev, 3);
6104                                                 rdev->pm.vblank_sync = true;
6105                                                 wake_up(&rdev->irq.vblank_queue);
6106                                         }
6107                                         if (atomic_read(&rdev->irq.pflip[3]))
6108                                                 radeon_crtc_handle_flip(rdev, 3);
6109                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6110                                         DRM_DEBUG("IH: D4 vblank\n");
6111                                 }
6112                                 break;
6113                         case 1: /* D4 vline */
6114                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6115                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6116                                         DRM_DEBUG("IH: D4 vline\n");
6117                                 }
6118                                 break;
6119                         default:
6120                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6121                                 break;
6122                         }
6123                         break;
6124                 case 5: /* D5 vblank/vline */
6125                         switch (src_data) {
6126                         case 0: /* D5 vblank */
6127                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6128                                         if (rdev->irq.crtc_vblank_int[4]) {
6129                                                 drm_handle_vblank(rdev->ddev, 4);
6130                                                 rdev->pm.vblank_sync = true;
6131                                                 wake_up(&rdev->irq.vblank_queue);
6132                                         }
6133                                         if (atomic_read(&rdev->irq.pflip[4]))
6134                                                 radeon_crtc_handle_flip(rdev, 4);
6135                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6136                                         DRM_DEBUG("IH: D5 vblank\n");
6137                                 }
6138                                 break;
6139                         case 1: /* D5 vline */
6140                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6141                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6142                                         DRM_DEBUG("IH: D5 vline\n");
6143                                 }
6144                                 break;
6145                         default:
6146                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6147                                 break;
6148                         }
6149                         break;
6150                 case 6: /* D6 vblank/vline */
6151                         switch (src_data) {
6152                         case 0: /* D6 vblank */
6153                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6154                                         if (rdev->irq.crtc_vblank_int[5]) {
6155                                                 drm_handle_vblank(rdev->ddev, 5);
6156                                                 rdev->pm.vblank_sync = true;
6157                                                 wake_up(&rdev->irq.vblank_queue);
6158                                         }
6159                                         if (atomic_read(&rdev->irq.pflip[5]))
6160                                                 radeon_crtc_handle_flip(rdev, 5);
6161                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6162                                         DRM_DEBUG("IH: D6 vblank\n");
6163                                 }
6164                                 break;
6165                         case 1: /* D6 vline */
6166                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6167                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6168                                         DRM_DEBUG("IH: D6 vline\n");
6169                                 }
6170                                 break;
6171                         default:
6172                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6173                                 break;
6174                         }
6175                         break;
6176                 case 42: /* HPD hotplug */
6177                         switch (src_data) {
6178                         case 0:
6179                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6180                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6181                                         queue_hotplug = true;
6182                                         DRM_DEBUG("IH: HPD1\n");
6183                                 }
6184                                 break;
6185                         case 1:
6186                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6187                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6188                                         queue_hotplug = true;
6189                                         DRM_DEBUG("IH: HPD2\n");
6190                                 }
6191                                 break;
6192                         case 2:
6193                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6194                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6195                                         queue_hotplug = true;
6196                                         DRM_DEBUG("IH: HPD3\n");
6197                                 }
6198                                 break;
6199                         case 3:
6200                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6201                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6202                                         queue_hotplug = true;
6203                                         DRM_DEBUG("IH: HPD4\n");
6204                                 }
6205                                 break;
6206                         case 4:
6207                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6208                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6209                                         queue_hotplug = true;
6210                                         DRM_DEBUG("IH: HPD5\n");
6211                                 }
6212                                 break;
6213                         case 5:
6214                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6215                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6216                                         queue_hotplug = true;
6217                                         DRM_DEBUG("IH: HPD6\n");
6218                                 }
6219                                 break;
6220                         default:
6221                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6222                                 break;
6223                         }
6224                         break;
6225                 case 146:
6226                 case 147:
6227                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6228                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6229                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6230                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6231                                 addr);
6232                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6233                                 status);
6234                         si_vm_decode_fault(rdev, status, addr);
6235                         /* reset addr and status */
6236                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6237                         break;
6238                 case 176: /* RINGID0 CP_INT */
6239                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6240                         break;
6241                 case 177: /* RINGID1 CP_INT */
6242                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6243                         break;
6244                 case 178: /* RINGID2 CP_INT */
6245                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6246                         break;
6247                 case 181: /* CP EOP event */
6248                         DRM_DEBUG("IH: CP EOP\n");
6249                         switch (ring_id) {
6250                         case 0:
6251                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6252                                 break;
6253                         case 1:
6254                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6255                                 break;
6256                         case 2:
6257                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6258                                 break;
6259                         }
6260                         break;
6261                 case 224: /* DMA trap event */
6262                         DRM_DEBUG("IH: DMA trap\n");
6263                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6264                         break;
6265                 case 230: /* thermal low to high */
6266                         DRM_DEBUG("IH: thermal low to high\n");
6267                         rdev->pm.dpm.thermal.high_to_low = false;
6268                         queue_thermal = true;
6269                         break;
6270                 case 231: /* thermal high to low */
6271                         DRM_DEBUG("IH: thermal high to low\n");
6272                         rdev->pm.dpm.thermal.high_to_low = true;
6273                         queue_thermal = true;
6274                         break;
6275                 case 233: /* GUI IDLE */
6276                         DRM_DEBUG("IH: GUI idle\n");
6277                         break;
6278                 case 244: /* DMA trap event */
6279                         DRM_DEBUG("IH: DMA1 trap\n");
6280                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6281                         break;
6282                 default:
6283                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6284                         break;
6285                 }
6286
6287                 /* wptr/rptr are in bytes! */
6288                 rptr += 16;
6289                 rptr &= rdev->ih.ptr_mask;
6290         }
6291         if (queue_hotplug)
6292                 schedule_work(&rdev->hotplug_work);
6293         if (queue_thermal && rdev->pm.dpm_enabled)
6294                 schedule_work(&rdev->pm.dpm.thermal.work);
6295         rdev->ih.rptr = rptr;
6296         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6297         atomic_set(&rdev->ih.lock, 0);
6298
6299         /* make sure wptr hasn't changed while processing */
6300         wptr = si_get_ih_wptr(rdev);
6301         if (wptr != rptr)
6302                 goto restart_ih;
6303
6304         return IRQ_HANDLED;
6305 }
6306
6307 /*
6308  * startup/shutdown callbacks
6309  */
6310 static int si_startup(struct radeon_device *rdev)
6311 {
6312         struct radeon_ring *ring;
6313         int r;
6314
6315         /* enable pcie gen2/3 link */
6316         si_pcie_gen3_enable(rdev);
6317         /* enable aspm */
6318         si_program_aspm(rdev);
6319
6320         /* scratch needs to be initialized before MC */
6321         r = r600_vram_scratch_init(rdev);
6322         if (r)
6323                 return r;
6324
6325         si_mc_program(rdev);
6326
6327         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6328             !rdev->rlc_fw || !rdev->mc_fw) {
6329                 r = si_init_microcode(rdev);
6330                 if (r) {
6331                         DRM_ERROR("Failed to load firmware!\n");
6332                         return r;
6333                 }
6334         }
6335
6336         r = si_mc_load_microcode(rdev);
6337         if (r) {
6338                 DRM_ERROR("Failed to load MC firmware!\n");
6339                 return r;
6340         }
6341
6342         r = si_pcie_gart_enable(rdev);
6343         if (r)
6344                 return r;
6345         si_gpu_init(rdev);
6346
6347         /* allocate rlc buffers */
6348         if (rdev->family == CHIP_VERDE) {
6349                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6350                 rdev->rlc.reg_list_size =
6351                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6352         }
6353         rdev->rlc.cs_data = si_cs_data;
6354         r = sumo_rlc_init(rdev);
6355         if (r) {
6356                 DRM_ERROR("Failed to init rlc BOs!\n");
6357                 return r;
6358         }
6359
6360         /* allocate wb buffer */
6361         r = radeon_wb_init(rdev);
6362         if (r)
6363                 return r;
6364
6365         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6366         if (r) {
6367                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6368                 return r;
6369         }
6370
6371         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6372         if (r) {
6373                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6374                 return r;
6375         }
6376
6377         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6378         if (r) {
6379                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6380                 return r;
6381         }
6382
6383         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6384         if (r) {
6385                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6386                 return r;
6387         }
6388
6389         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6390         if (r) {
6391                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6392                 return r;
6393         }
6394
6395         if (rdev->has_uvd) {
6396                 r = uvd_v2_2_resume(rdev);
6397                 if (!r) {
6398                         r = radeon_fence_driver_start_ring(rdev,
6399                                                            R600_RING_TYPE_UVD_INDEX);
6400                         if (r)
6401                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6402                 }
6403                 if (r)
6404                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6405         }
6406
6407         /* Enable IRQ */
6408         if (!rdev->irq.installed) {
6409                 r = radeon_irq_kms_init(rdev);
6410                 if (r)
6411                         return r;
6412         }
6413
6414         r = si_irq_init(rdev);
6415         if (r) {
6416                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6417                 radeon_irq_kms_fini(rdev);
6418                 return r;
6419         }
6420         si_irq_set(rdev);
6421
6422         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6423         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6424                              CP_RB0_RPTR, CP_RB0_WPTR,
6425                              RADEON_CP_PACKET2);
6426         if (r)
6427                 return r;
6428
6429         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6430         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6431                              CP_RB1_RPTR, CP_RB1_WPTR,
6432                              RADEON_CP_PACKET2);
6433         if (r)
6434                 return r;
6435
6436         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6437         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6438                              CP_RB2_RPTR, CP_RB2_WPTR,
6439                              RADEON_CP_PACKET2);
6440         if (r)
6441                 return r;
6442
6443         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6444         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6445                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6446                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6447                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6448         if (r)
6449                 return r;
6450
6451         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6452         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6453                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6454                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6455                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6456         if (r)
6457                 return r;
6458
6459         r = si_cp_load_microcode(rdev);
6460         if (r)
6461                 return r;
6462         r = si_cp_resume(rdev);
6463         if (r)
6464                 return r;
6465
6466         r = cayman_dma_resume(rdev);
6467         if (r)
6468                 return r;
6469
6470         if (rdev->has_uvd) {
6471                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6472                 if (ring->ring_size) {
6473                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6474                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6475                                              RADEON_CP_PACKET2);
6476                         if (!r)
6477                                 r = uvd_v1_0_init(rdev);
6478                         if (r)
6479                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6480                 }
6481         }
6482
6483         r = radeon_ib_pool_init(rdev);
6484         if (r) {
6485                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6486                 return r;
6487         }
6488
6489         r = radeon_vm_manager_init(rdev);
6490         if (r) {
6491                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6492                 return r;
6493         }
6494
6495         r = dce6_audio_init(rdev);
6496         if (r)
6497                 return r;
6498
6499         return 0;
6500 }
6501
6502 int si_resume(struct radeon_device *rdev)
6503 {
6504         int r;
6505
6506         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6507          * posting will perform necessary task to bring back GPU into good
6508          * shape.
6509          */
6510         /* post card */
6511         atom_asic_init(rdev->mode_info.atom_context);
6512
6513         /* init golden registers */
6514         si_init_golden_registers(rdev);
6515
6516         rdev->accel_working = true;
6517         r = si_startup(rdev);
6518         if (r) {
6519                 DRM_ERROR("si startup failed on resume\n");
6520                 rdev->accel_working = false;
6521                 return r;
6522         }
6523
6524         return r;
6525
6526 }
6527
6528 int si_suspend(struct radeon_device *rdev)
6529 {
6530         dce6_audio_fini(rdev);
6531         radeon_vm_manager_fini(rdev);
6532         si_cp_enable(rdev, false);
6533         cayman_dma_stop(rdev);
6534         if (rdev->has_uvd) {
6535                 uvd_v1_0_fini(rdev);
6536                 radeon_uvd_suspend(rdev);
6537         }
6538         si_fini_pg(rdev);
6539         si_fini_cg(rdev);
6540         si_irq_suspend(rdev);
6541         radeon_wb_disable(rdev);
6542         si_pcie_gart_disable(rdev);
6543         return 0;
6544 }
6545
6546 /* Plan is to move initialization in that function and use
6547  * helper function so that radeon_device_init pretty much
6548  * do nothing more than calling asic specific function. This
6549  * should also allow to remove a bunch of callback function
6550  * like vram_info.
6551  */
6552 int si_init(struct radeon_device *rdev)
6553 {
6554         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6555         int r;
6556
6557         /* Read BIOS */
6558         if (!radeon_get_bios(rdev)) {
6559                 if (ASIC_IS_AVIVO(rdev))
6560                         return -EINVAL;
6561         }
6562         /* Must be an ATOMBIOS */
6563         if (!rdev->is_atom_bios) {
6564                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6565                 return -EINVAL;
6566         }
6567         r = radeon_atombios_init(rdev);
6568         if (r)
6569                 return r;
6570
6571         /* Post card if necessary */
6572         if (!radeon_card_posted(rdev)) {
6573                 if (!rdev->bios) {
6574                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6575                         return -EINVAL;
6576                 }
6577                 DRM_INFO("GPU not posted. posting now...\n");
6578                 atom_asic_init(rdev->mode_info.atom_context);
6579         }
6580         /* init golden registers */
6581         si_init_golden_registers(rdev);
6582         /* Initialize scratch registers */
6583         si_scratch_init(rdev);
6584         /* Initialize surface registers */
6585         radeon_surface_init(rdev);
6586         /* Initialize clocks */
6587         radeon_get_clock_info(rdev->ddev);
6588
6589         /* Fence driver */
6590         r = radeon_fence_driver_init(rdev);
6591         if (r)
6592                 return r;
6593
6594         /* initialize memory controller */
6595         r = si_mc_init(rdev);
6596         if (r)
6597                 return r;
6598         /* Memory manager */
6599         r = radeon_bo_init(rdev);
6600         if (r)
6601                 return r;
6602
6603         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6604         ring->ring_obj = NULL;
6605         r600_ring_init(rdev, ring, 1024 * 1024);
6606
6607         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6608         ring->ring_obj = NULL;
6609         r600_ring_init(rdev, ring, 1024 * 1024);
6610
6611         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6612         ring->ring_obj = NULL;
6613         r600_ring_init(rdev, ring, 1024 * 1024);
6614
6615         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6616         ring->ring_obj = NULL;
6617         r600_ring_init(rdev, ring, 64 * 1024);
6618
6619         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6620         ring->ring_obj = NULL;
6621         r600_ring_init(rdev, ring, 64 * 1024);
6622
6623         if (rdev->has_uvd) {
6624                 r = radeon_uvd_init(rdev);
6625                 if (!r) {
6626                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6627                         ring->ring_obj = NULL;
6628                         r600_ring_init(rdev, ring, 4096);
6629                 }
6630         }
6631
6632         rdev->ih.ring_obj = NULL;
6633         r600_ih_ring_init(rdev, 64 * 1024);
6634
6635         r = r600_pcie_gart_init(rdev);
6636         if (r)
6637                 return r;
6638
6639         rdev->accel_working = true;
6640         r = si_startup(rdev);
6641         if (r) {
6642                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6643                 si_cp_fini(rdev);
6644                 cayman_dma_fini(rdev);
6645                 si_irq_fini(rdev);
6646                 sumo_rlc_fini(rdev);
6647                 radeon_wb_fini(rdev);
6648                 radeon_ib_pool_fini(rdev);
6649                 radeon_vm_manager_fini(rdev);
6650                 radeon_irq_kms_fini(rdev);
6651                 si_pcie_gart_fini(rdev);
6652                 rdev->accel_working = false;
6653         }
6654
6655         /* Don't start up if the MC ucode is missing.
6656          * The default clocks and voltages before the MC ucode
6657          * is loaded are not suffient for advanced operations.
6658          */
6659         if (!rdev->mc_fw) {
6660                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6661                 return -EINVAL;
6662         }
6663
6664         return 0;
6665 }
6666
6667 void si_fini(struct radeon_device *rdev)
6668 {
6669         si_cp_fini(rdev);
6670         cayman_dma_fini(rdev);
6671         si_fini_pg(rdev);
6672         si_fini_cg(rdev);
6673         si_irq_fini(rdev);
6674         sumo_rlc_fini(rdev);
6675         radeon_wb_fini(rdev);
6676         radeon_vm_manager_fini(rdev);
6677         radeon_ib_pool_fini(rdev);
6678         radeon_irq_kms_fini(rdev);
6679         if (rdev->has_uvd) {
6680                 uvd_v1_0_fini(rdev);
6681                 radeon_uvd_fini(rdev);
6682         }
6683         si_pcie_gart_fini(rdev);
6684         r600_vram_scratch_fini(rdev);
6685         radeon_gem_fini(rdev);
6686         radeon_fence_driver_fini(rdev);
6687         radeon_bo_fini(rdev);
6688         radeon_atombios_fini(rdev);
6689         kfree(rdev->bios);
6690         rdev->bios = NULL;
6691 }
6692
6693 /**
6694  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6695  *
6696  * @rdev: radeon_device pointer
6697  *
6698  * Fetches a GPU clock counter snapshot (SI).
6699  * Returns the 64 bit clock counter snapshot.
6700  */
6701 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6702 {
6703         uint64_t clock;
6704
6705         mutex_lock(&rdev->gpu_clock_mutex);
6706         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6707         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6708                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6709         mutex_unlock(&rdev->gpu_clock_mutex);
6710         return clock;
6711 }
6712
6713 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6714 {
6715         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6716         int r;
6717
6718         /* bypass vclk and dclk with bclk */
6719         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6720                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6721                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6722
6723         /* put PLL in bypass mode */
6724         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6725
6726         if (!vclk || !dclk) {
6727                 /* keep the Bypass mode, put PLL to sleep */
6728                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6729                 return 0;
6730         }
6731
6732         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6733                                           16384, 0x03FFFFFF, 0, 128, 5,
6734                                           &fb_div, &vclk_div, &dclk_div);
6735         if (r)
6736                 return r;
6737
6738         /* set RESET_ANTI_MUX to 0 */
6739         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6740
6741         /* set VCO_MODE to 1 */
6742         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6743
6744         /* toggle UPLL_SLEEP to 1 then back to 0 */
6745         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6746         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6747
6748         /* deassert UPLL_RESET */
6749         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6750
6751         mdelay(1);
6752
6753         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6754         if (r)
6755                 return r;
6756
6757         /* assert UPLL_RESET again */
6758         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6759
6760         /* disable spread spectrum. */
6761         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6762
6763         /* set feedback divider */
6764         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6765
6766         /* set ref divider to 0 */
6767         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6768
6769         if (fb_div < 307200)
6770                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6771         else
6772                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6773
6774         /* set PDIV_A and PDIV_B */
6775         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6776                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6777                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6778
6779         /* give the PLL some time to settle */
6780         mdelay(15);
6781
6782         /* deassert PLL_RESET */
6783         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6784
6785         mdelay(15);
6786
6787         /* switch from bypass mode to normal mode */
6788         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6789
6790         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6791         if (r)
6792                 return r;
6793
6794         /* switch VCLK and DCLK selection */
6795         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6796                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6797                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6798
6799         mdelay(100);
6800
6801         return 0;
6802 }
6803
6804 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6805 {
6806         struct pci_dev *root = rdev->pdev->bus->self;
6807         int bridge_pos, gpu_pos;
6808         u32 speed_cntl, mask, current_data_rate;
6809         int ret, i;
6810         u16 tmp16;
6811
6812         if (radeon_pcie_gen2 == 0)
6813                 return;
6814
6815         if (rdev->flags & RADEON_IS_IGP)
6816                 return;
6817
6818         if (!(rdev->flags & RADEON_IS_PCIE))
6819                 return;
6820
6821         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6822         if (ret != 0)
6823                 return;
6824
6825         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6826                 return;
6827
6828         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6829         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6830                 LC_CURRENT_DATA_RATE_SHIFT;
6831         if (mask & DRM_PCIE_SPEED_80) {
6832                 if (current_data_rate == 2) {
6833                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6834                         return;
6835                 }
6836                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6837         } else if (mask & DRM_PCIE_SPEED_50) {
6838                 if (current_data_rate == 1) {
6839                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6840                         return;
6841                 }
6842                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6843         }
6844
6845         bridge_pos = pci_pcie_cap(root);
6846         if (!bridge_pos)
6847                 return;
6848
6849         gpu_pos = pci_pcie_cap(rdev->pdev);
6850         if (!gpu_pos)
6851                 return;
6852
6853         if (mask & DRM_PCIE_SPEED_80) {
6854                 /* re-try equalization if gen3 is not already enabled */
6855                 if (current_data_rate != 2) {
6856                         u16 bridge_cfg, gpu_cfg;
6857                         u16 bridge_cfg2, gpu_cfg2;
6858                         u32 max_lw, current_lw, tmp;
6859
6860                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6861                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6862
6863                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6864                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6865
6866                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6867                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6868
6869                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6870                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6871                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6872
6873                         if (current_lw < max_lw) {
6874                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6875                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6876                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6877                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6878                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6879                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6880                                 }
6881                         }
6882
6883                         for (i = 0; i < 10; i++) {
6884                                 /* check status */
6885                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6886                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6887                                         break;
6888
6889                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6890                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6891
6892                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6893                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6894
6895                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6896                                 tmp |= LC_SET_QUIESCE;
6897                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6898
6899                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6900                                 tmp |= LC_REDO_EQ;
6901                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6902
6903                                 mdelay(100);
6904
6905                                 /* linkctl */
6906                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6907                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6908                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6909                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6910
6911                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6912                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6913                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6914                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6915
6916                                 /* linkctl2 */
6917                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6918                                 tmp16 &= ~((1 << 4) | (7 << 9));
6919                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6920                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6921
6922                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6923                                 tmp16 &= ~((1 << 4) | (7 << 9));
6924                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6925                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6926
6927                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6928                                 tmp &= ~LC_SET_QUIESCE;
6929                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6930                         }
6931                 }
6932         }
6933
6934         /* set the link speed */
6935         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6936         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6937         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6938
6939         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6940         tmp16 &= ~0xf;
6941         if (mask & DRM_PCIE_SPEED_80)
6942                 tmp16 |= 3; /* gen3 */
6943         else if (mask & DRM_PCIE_SPEED_50)
6944                 tmp16 |= 2; /* gen2 */
6945         else
6946                 tmp16 |= 1; /* gen1 */
6947         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6948
6949         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6950         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6951         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6952
6953         for (i = 0; i < rdev->usec_timeout; i++) {
6954                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6955                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6956                         break;
6957                 udelay(1);
6958         }
6959 }
6960
6961 static void si_program_aspm(struct radeon_device *rdev)
6962 {
6963         u32 data, orig;
6964         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6965         bool disable_clkreq = false;
6966
6967         if (radeon_aspm == 0)
6968                 return;
6969
6970         if (!(rdev->flags & RADEON_IS_PCIE))
6971                 return;
6972
6973         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6974         data &= ~LC_XMIT_N_FTS_MASK;
6975         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6976         if (orig != data)
6977                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6978
6979         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6980         data |= LC_GO_TO_RECOVERY;
6981         if (orig != data)
6982                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6983
6984         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6985         data |= P_IGNORE_EDB_ERR;
6986         if (orig != data)
6987                 WREG32_PCIE(PCIE_P_CNTL, data);
6988
6989         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6990         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6991         data |= LC_PMI_TO_L1_DIS;
6992         if (!disable_l0s)
6993                 data |= LC_L0S_INACTIVITY(7);
6994
6995         if (!disable_l1) {
6996                 data |= LC_L1_INACTIVITY(7);
6997                 data &= ~LC_PMI_TO_L1_DIS;
6998                 if (orig != data)
6999                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7000
7001                 if (!disable_plloff_in_l1) {
7002                         bool clk_req_support;
7003
7004                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7005                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7006                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7007                         if (orig != data)
7008                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7009
7010                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7011                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7012                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7013                         if (orig != data)
7014                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7015
7016                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7017                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7018                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7019                         if (orig != data)
7020                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7021
7022                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7023                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7024                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7025                         if (orig != data)
7026                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7027
7028                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7029                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7030                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7031                                 if (orig != data)
7032                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7033
7034                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7035                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7036                                 if (orig != data)
7037                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7038
7039                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7040                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7041                                 if (orig != data)
7042                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7043
7044                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7045                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7046                                 if (orig != data)
7047                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7048
7049                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7050                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7051                                 if (orig != data)
7052                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7053
7054                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7055                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7056                                 if (orig != data)
7057                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7058
7059                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7060                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7061                                 if (orig != data)
7062                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7063
7064                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7065                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7066                                 if (orig != data)
7067                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7068                         }
7069                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7070                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7071                         data |= LC_DYN_LANES_PWR_STATE(3);
7072                         if (orig != data)
7073                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7074
7075                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7076                         data &= ~LS2_EXIT_TIME_MASK;
7077                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7078                                 data |= LS2_EXIT_TIME(5);
7079                         if (orig != data)
7080                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7081
7082                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7083                         data &= ~LS2_EXIT_TIME_MASK;
7084                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7085                                 data |= LS2_EXIT_TIME(5);
7086                         if (orig != data)
7087                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7088
7089                         if (!disable_clkreq) {
7090                                 struct pci_dev *root = rdev->pdev->bus->self;
7091                                 u32 lnkcap;
7092
7093                                 clk_req_support = false;
7094                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7095                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7096                                         clk_req_support = true;
7097                         } else {
7098                                 clk_req_support = false;
7099                         }
7100
7101                         if (clk_req_support) {
7102                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7103                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7104                                 if (orig != data)
7105                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7106
7107                                 orig = data = RREG32(THM_CLK_CNTL);
7108                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7109                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7110                                 if (orig != data)
7111                                         WREG32(THM_CLK_CNTL, data);
7112
7113                                 orig = data = RREG32(MISC_CLK_CNTL);
7114                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7115                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7116                                 if (orig != data)
7117                                         WREG32(MISC_CLK_CNTL, data);
7118
7119                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7120                                 data &= ~BCLK_AS_XCLK;
7121                                 if (orig != data)
7122                                         WREG32(CG_CLKPIN_CNTL, data);
7123
7124                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7125                                 data &= ~FORCE_BIF_REFCLK_EN;
7126                                 if (orig != data)
7127                                         WREG32(CG_CLKPIN_CNTL_2, data);
7128
7129                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7130                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7131                                 data |= MPLL_CLKOUT_SEL(4);
7132                                 if (orig != data)
7133                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7134
7135                                 orig = data = RREG32(SPLL_CNTL_MODE);
7136                                 data &= ~SPLL_REFCLK_SEL_MASK;
7137                                 if (orig != data)
7138                                         WREG32(SPLL_CNTL_MODE, data);
7139                         }
7140                 }
7141         } else {
7142                 if (orig != data)
7143                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7144         }
7145
7146         orig = data = RREG32_PCIE(PCIE_CNTL2);
7147         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7148         if (orig != data)
7149                 WREG32_PCIE(PCIE_CNTL2, data);
7150
7151         if (!disable_l0s) {
7152                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7153                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7154                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7155                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7156                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7157                                 data &= ~LC_L0S_INACTIVITY_MASK;
7158                                 if (orig != data)
7159                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7160                         }
7161                 }
7162         }
7163 }