Merge branches 'pm-cpufreq', 'pm-cpuidle', 'pm-devfreq', 'pm-opp' and 'pm-tools'
[linux-drm-fsl-dcu.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52
53 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
60
61 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
67
68 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
69 MODULE_FIRMWARE("radeon/VERDE_me.bin");
70 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
73 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
75
76 MODULE_FIRMWARE("radeon/verde_pfp.bin");
77 MODULE_FIRMWARE("radeon/verde_me.bin");
78 MODULE_FIRMWARE("radeon/verde_ce.bin");
79 MODULE_FIRMWARE("radeon/verde_mc.bin");
80 MODULE_FIRMWARE("radeon/verde_rlc.bin");
81 MODULE_FIRMWARE("radeon/verde_smc.bin");
82
83 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
84 MODULE_FIRMWARE("radeon/OLAND_me.bin");
85 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
88 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
89 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
90
91 MODULE_FIRMWARE("radeon/oland_pfp.bin");
92 MODULE_FIRMWARE("radeon/oland_me.bin");
93 MODULE_FIRMWARE("radeon/oland_ce.bin");
94 MODULE_FIRMWARE("radeon/oland_mc.bin");
95 MODULE_FIRMWARE("radeon/oland_rlc.bin");
96 MODULE_FIRMWARE("radeon/oland_smc.bin");
97
98 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
105
106 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
107 MODULE_FIRMWARE("radeon/hainan_me.bin");
108 MODULE_FIRMWARE("radeon/hainan_ce.bin");
109 MODULE_FIRMWARE("radeon/hainan_mc.bin");
110 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
111 MODULE_FIRMWARE("radeon/hainan_smc.bin");
112
113 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
114 static void si_pcie_gen3_enable(struct radeon_device *rdev);
115 static void si_program_aspm(struct radeon_device *rdev);
116 extern void sumo_rlc_fini(struct radeon_device *rdev);
117 extern int sumo_rlc_init(struct radeon_device *rdev);
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
124 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
125 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
126 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
127                                          bool enable);
128 static void si_init_pg(struct radeon_device *rdev);
129 static void si_init_cg(struct radeon_device *rdev);
130 static void si_fini_pg(struct radeon_device *rdev);
131 static void si_fini_cg(struct radeon_device *rdev);
132 static void si_rlc_stop(struct radeon_device *rdev);
133
134 static const u32 verde_rlc_save_restore_register_list[] =
135 {
136         (0x8000 << 16) | (0x98f4 >> 2),
137         0x00000000,
138         (0x8040 << 16) | (0x98f4 >> 2),
139         0x00000000,
140         (0x8000 << 16) | (0xe80 >> 2),
141         0x00000000,
142         (0x8040 << 16) | (0xe80 >> 2),
143         0x00000000,
144         (0x8000 << 16) | (0x89bc >> 2),
145         0x00000000,
146         (0x8040 << 16) | (0x89bc >> 2),
147         0x00000000,
148         (0x8000 << 16) | (0x8c1c >> 2),
149         0x00000000,
150         (0x8040 << 16) | (0x8c1c >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x98f0 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0xe7c >> 2),
155         0x00000000,
156         (0x8000 << 16) | (0x9148 >> 2),
157         0x00000000,
158         (0x8040 << 16) | (0x9148 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9150 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x897c >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x8d8c >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0xac54 >> 2),
167         0X00000000,
168         0x3,
169         (0x9c00 << 16) | (0x98f8 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x9910 >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x9914 >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9918 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x991c >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9920 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x9924 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x9928 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x992c >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x9930 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x9934 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x9938 >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x993c >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x9940 >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0x9944 >> 2),
198         0x00000000,
199         (0x9c00 << 16) | (0x9948 >> 2),
200         0x00000000,
201         (0x9c00 << 16) | (0x994c >> 2),
202         0x00000000,
203         (0x9c00 << 16) | (0x9950 >> 2),
204         0x00000000,
205         (0x9c00 << 16) | (0x9954 >> 2),
206         0x00000000,
207         (0x9c00 << 16) | (0x9958 >> 2),
208         0x00000000,
209         (0x9c00 << 16) | (0x995c >> 2),
210         0x00000000,
211         (0x9c00 << 16) | (0x9960 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9964 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x9968 >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x996c >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x9970 >> 2),
220         0x00000000,
221         (0x9c00 << 16) | (0x9974 >> 2),
222         0x00000000,
223         (0x9c00 << 16) | (0x9978 >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x997c >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9980 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x9984 >> 2),
230         0x00000000,
231         (0x9c00 << 16) | (0x9988 >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x998c >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x8c00 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x8c14 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x8c04 >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8c08 >> 2),
242         0x00000000,
243         (0x8000 << 16) | (0x9b7c >> 2),
244         0x00000000,
245         (0x8040 << 16) | (0x9b7c >> 2),
246         0x00000000,
247         (0x8000 << 16) | (0xe84 >> 2),
248         0x00000000,
249         (0x8040 << 16) | (0xe84 >> 2),
250         0x00000000,
251         (0x8000 << 16) | (0x89c0 >> 2),
252         0x00000000,
253         (0x8040 << 16) | (0x89c0 >> 2),
254         0x00000000,
255         (0x8000 << 16) | (0x914c >> 2),
256         0x00000000,
257         (0x8040 << 16) | (0x914c >> 2),
258         0x00000000,
259         (0x8000 << 16) | (0x8c20 >> 2),
260         0x00000000,
261         (0x8040 << 16) | (0x8c20 >> 2),
262         0x00000000,
263         (0x8000 << 16) | (0x9354 >> 2),
264         0x00000000,
265         (0x8040 << 16) | (0x9354 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0x9060 >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0x9364 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0x9100 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x913c >> 2),
274         0x00000000,
275         (0x8000 << 16) | (0x90e0 >> 2),
276         0x00000000,
277         (0x8000 << 16) | (0x90e4 >> 2),
278         0x00000000,
279         (0x8000 << 16) | (0x90e8 >> 2),
280         0x00000000,
281         (0x8040 << 16) | (0x90e0 >> 2),
282         0x00000000,
283         (0x8040 << 16) | (0x90e4 >> 2),
284         0x00000000,
285         (0x8040 << 16) | (0x90e8 >> 2),
286         0x00000000,
287         (0x9c00 << 16) | (0x8bcc >> 2),
288         0x00000000,
289         (0x9c00 << 16) | (0x8b24 >> 2),
290         0x00000000,
291         (0x9c00 << 16) | (0x88c4 >> 2),
292         0x00000000,
293         (0x9c00 << 16) | (0x8e50 >> 2),
294         0x00000000,
295         (0x9c00 << 16) | (0x8c0c >> 2),
296         0x00000000,
297         (0x9c00 << 16) | (0x8e58 >> 2),
298         0x00000000,
299         (0x9c00 << 16) | (0x8e5c >> 2),
300         0x00000000,
301         (0x9c00 << 16) | (0x9508 >> 2),
302         0x00000000,
303         (0x9c00 << 16) | (0x950c >> 2),
304         0x00000000,
305         (0x9c00 << 16) | (0x9494 >> 2),
306         0x00000000,
307         (0x9c00 << 16) | (0xac0c >> 2),
308         0x00000000,
309         (0x9c00 << 16) | (0xac10 >> 2),
310         0x00000000,
311         (0x9c00 << 16) | (0xac14 >> 2),
312         0x00000000,
313         (0x9c00 << 16) | (0xae00 >> 2),
314         0x00000000,
315         (0x9c00 << 16) | (0xac08 >> 2),
316         0x00000000,
317         (0x9c00 << 16) | (0x88d4 >> 2),
318         0x00000000,
319         (0x9c00 << 16) | (0x88c8 >> 2),
320         0x00000000,
321         (0x9c00 << 16) | (0x88cc >> 2),
322         0x00000000,
323         (0x9c00 << 16) | (0x89b0 >> 2),
324         0x00000000,
325         (0x9c00 << 16) | (0x8b10 >> 2),
326         0x00000000,
327         (0x9c00 << 16) | (0x8a14 >> 2),
328         0x00000000,
329         (0x9c00 << 16) | (0x9830 >> 2),
330         0x00000000,
331         (0x9c00 << 16) | (0x9834 >> 2),
332         0x00000000,
333         (0x9c00 << 16) | (0x9838 >> 2),
334         0x00000000,
335         (0x9c00 << 16) | (0x9a10 >> 2),
336         0x00000000,
337         (0x8000 << 16) | (0x9870 >> 2),
338         0x00000000,
339         (0x8000 << 16) | (0x9874 >> 2),
340         0x00000000,
341         (0x8001 << 16) | (0x9870 >> 2),
342         0x00000000,
343         (0x8001 << 16) | (0x9874 >> 2),
344         0x00000000,
345         (0x8040 << 16) | (0x9870 >> 2),
346         0x00000000,
347         (0x8040 << 16) | (0x9874 >> 2),
348         0x00000000,
349         (0x8041 << 16) | (0x9870 >> 2),
350         0x00000000,
351         (0x8041 << 16) | (0x9874 >> 2),
352         0x00000000,
353         0x00000000
354 };
355
356 static const u32 tahiti_golden_rlc_registers[] =
357 {
358         0xc424, 0xffffffff, 0x00601005,
359         0xc47c, 0xffffffff, 0x10104040,
360         0xc488, 0xffffffff, 0x0100000a,
361         0xc314, 0xffffffff, 0x00000800,
362         0xc30c, 0xffffffff, 0x800000f4,
363         0xf4a8, 0xffffffff, 0x00000000
364 };
365
366 static const u32 tahiti_golden_registers[] =
367 {
368         0x9a10, 0x00010000, 0x00018208,
369         0x9830, 0xffffffff, 0x00000000,
370         0x9834, 0xf00fffff, 0x00000400,
371         0x9838, 0x0002021c, 0x00020200,
372         0xc78, 0x00000080, 0x00000000,
373         0xd030, 0x000300c0, 0x00800040,
374         0xd830, 0x000300c0, 0x00800040,
375         0x5bb0, 0x000000f0, 0x00000070,
376         0x5bc0, 0x00200000, 0x50100000,
377         0x7030, 0x31000311, 0x00000011,
378         0x277c, 0x00000003, 0x000007ff,
379         0x240c, 0x000007ff, 0x00000000,
380         0x8a14, 0xf000001f, 0x00000007,
381         0x8b24, 0xffffffff, 0x00ffffff,
382         0x8b10, 0x0000ff0f, 0x00000000,
383         0x28a4c, 0x07ffffff, 0x4e000000,
384         0x28350, 0x3f3f3fff, 0x2a00126a,
385         0x30, 0x000000ff, 0x0040,
386         0x34, 0x00000040, 0x00004040,
387         0x9100, 0x07ffffff, 0x03000000,
388         0x8e88, 0x01ff1f3f, 0x00000000,
389         0x8e84, 0x01ff1f3f, 0x00000000,
390         0x9060, 0x0000007f, 0x00000020,
391         0x9508, 0x00010000, 0x00010000,
392         0xac14, 0x00000200, 0x000002fb,
393         0xac10, 0xffffffff, 0x0000543b,
394         0xac0c, 0xffffffff, 0xa9210876,
395         0x88d0, 0xffffffff, 0x000fff40,
396         0x88d4, 0x0000001f, 0x00000010,
397         0x1410, 0x20000000, 0x20fffed8,
398         0x15c0, 0x000c0fc0, 0x000c0400
399 };
400
401 static const u32 tahiti_golden_registers2[] =
402 {
403         0xc64, 0x00000001, 0x00000001
404 };
405
406 static const u32 pitcairn_golden_rlc_registers[] =
407 {
408         0xc424, 0xffffffff, 0x00601004,
409         0xc47c, 0xffffffff, 0x10102020,
410         0xc488, 0xffffffff, 0x01000020,
411         0xc314, 0xffffffff, 0x00000800,
412         0xc30c, 0xffffffff, 0x800000a4
413 };
414
415 static const u32 pitcairn_golden_registers[] =
416 {
417         0x9a10, 0x00010000, 0x00018208,
418         0x9830, 0xffffffff, 0x00000000,
419         0x9834, 0xf00fffff, 0x00000400,
420         0x9838, 0x0002021c, 0x00020200,
421         0xc78, 0x00000080, 0x00000000,
422         0xd030, 0x000300c0, 0x00800040,
423         0xd830, 0x000300c0, 0x00800040,
424         0x5bb0, 0x000000f0, 0x00000070,
425         0x5bc0, 0x00200000, 0x50100000,
426         0x7030, 0x31000311, 0x00000011,
427         0x2ae4, 0x00073ffe, 0x000022a2,
428         0x240c, 0x000007ff, 0x00000000,
429         0x8a14, 0xf000001f, 0x00000007,
430         0x8b24, 0xffffffff, 0x00ffffff,
431         0x8b10, 0x0000ff0f, 0x00000000,
432         0x28a4c, 0x07ffffff, 0x4e000000,
433         0x28350, 0x3f3f3fff, 0x2a00126a,
434         0x30, 0x000000ff, 0x0040,
435         0x34, 0x00000040, 0x00004040,
436         0x9100, 0x07ffffff, 0x03000000,
437         0x9060, 0x0000007f, 0x00000020,
438         0x9508, 0x00010000, 0x00010000,
439         0xac14, 0x000003ff, 0x000000f7,
440         0xac10, 0xffffffff, 0x00000000,
441         0xac0c, 0xffffffff, 0x32761054,
442         0x88d4, 0x0000001f, 0x00000010,
443         0x15c0, 0x000c0fc0, 0x000c0400
444 };
445
446 static const u32 verde_golden_rlc_registers[] =
447 {
448         0xc424, 0xffffffff, 0x033f1005,
449         0xc47c, 0xffffffff, 0x10808020,
450         0xc488, 0xffffffff, 0x00800008,
451         0xc314, 0xffffffff, 0x00001000,
452         0xc30c, 0xffffffff, 0x80010014
453 };
454
455 static const u32 verde_golden_registers[] =
456 {
457         0x9a10, 0x00010000, 0x00018208,
458         0x9830, 0xffffffff, 0x00000000,
459         0x9834, 0xf00fffff, 0x00000400,
460         0x9838, 0x0002021c, 0x00020200,
461         0xc78, 0x00000080, 0x00000000,
462         0xd030, 0x000300c0, 0x00800040,
463         0xd030, 0x000300c0, 0x00800040,
464         0xd830, 0x000300c0, 0x00800040,
465         0xd830, 0x000300c0, 0x00800040,
466         0x5bb0, 0x000000f0, 0x00000070,
467         0x5bc0, 0x00200000, 0x50100000,
468         0x7030, 0x31000311, 0x00000011,
469         0x2ae4, 0x00073ffe, 0x000022a2,
470         0x2ae4, 0x00073ffe, 0x000022a2,
471         0x2ae4, 0x00073ffe, 0x000022a2,
472         0x240c, 0x000007ff, 0x00000000,
473         0x240c, 0x000007ff, 0x00000000,
474         0x240c, 0x000007ff, 0x00000000,
475         0x8a14, 0xf000001f, 0x00000007,
476         0x8a14, 0xf000001f, 0x00000007,
477         0x8a14, 0xf000001f, 0x00000007,
478         0x8b24, 0xffffffff, 0x00ffffff,
479         0x8b10, 0x0000ff0f, 0x00000000,
480         0x28a4c, 0x07ffffff, 0x4e000000,
481         0x28350, 0x3f3f3fff, 0x0000124a,
482         0x28350, 0x3f3f3fff, 0x0000124a,
483         0x28350, 0x3f3f3fff, 0x0000124a,
484         0x30, 0x000000ff, 0x0040,
485         0x34, 0x00000040, 0x00004040,
486         0x9100, 0x07ffffff, 0x03000000,
487         0x9100, 0x07ffffff, 0x03000000,
488         0x8e88, 0x01ff1f3f, 0x00000000,
489         0x8e88, 0x01ff1f3f, 0x00000000,
490         0x8e88, 0x01ff1f3f, 0x00000000,
491         0x8e84, 0x01ff1f3f, 0x00000000,
492         0x8e84, 0x01ff1f3f, 0x00000000,
493         0x8e84, 0x01ff1f3f, 0x00000000,
494         0x9060, 0x0000007f, 0x00000020,
495         0x9508, 0x00010000, 0x00010000,
496         0xac14, 0x000003ff, 0x00000003,
497         0xac14, 0x000003ff, 0x00000003,
498         0xac14, 0x000003ff, 0x00000003,
499         0xac10, 0xffffffff, 0x00000000,
500         0xac10, 0xffffffff, 0x00000000,
501         0xac10, 0xffffffff, 0x00000000,
502         0xac0c, 0xffffffff, 0x00001032,
503         0xac0c, 0xffffffff, 0x00001032,
504         0xac0c, 0xffffffff, 0x00001032,
505         0x88d4, 0x0000001f, 0x00000010,
506         0x88d4, 0x0000001f, 0x00000010,
507         0x88d4, 0x0000001f, 0x00000010,
508         0x15c0, 0x000c0fc0, 0x000c0400
509 };
510
511 static const u32 oland_golden_rlc_registers[] =
512 {
513         0xc424, 0xffffffff, 0x00601005,
514         0xc47c, 0xffffffff, 0x10104040,
515         0xc488, 0xffffffff, 0x0100000a,
516         0xc314, 0xffffffff, 0x00000800,
517         0xc30c, 0xffffffff, 0x800000f4
518 };
519
520 static const u32 oland_golden_registers[] =
521 {
522         0x9a10, 0x00010000, 0x00018208,
523         0x9830, 0xffffffff, 0x00000000,
524         0x9834, 0xf00fffff, 0x00000400,
525         0x9838, 0x0002021c, 0x00020200,
526         0xc78, 0x00000080, 0x00000000,
527         0xd030, 0x000300c0, 0x00800040,
528         0xd830, 0x000300c0, 0x00800040,
529         0x5bb0, 0x000000f0, 0x00000070,
530         0x5bc0, 0x00200000, 0x50100000,
531         0x7030, 0x31000311, 0x00000011,
532         0x2ae4, 0x00073ffe, 0x000022a2,
533         0x240c, 0x000007ff, 0x00000000,
534         0x8a14, 0xf000001f, 0x00000007,
535         0x8b24, 0xffffffff, 0x00ffffff,
536         0x8b10, 0x0000ff0f, 0x00000000,
537         0x28a4c, 0x07ffffff, 0x4e000000,
538         0x28350, 0x3f3f3fff, 0x00000082,
539         0x30, 0x000000ff, 0x0040,
540         0x34, 0x00000040, 0x00004040,
541         0x9100, 0x07ffffff, 0x03000000,
542         0x9060, 0x0000007f, 0x00000020,
543         0x9508, 0x00010000, 0x00010000,
544         0xac14, 0x000003ff, 0x000000f3,
545         0xac10, 0xffffffff, 0x00000000,
546         0xac0c, 0xffffffff, 0x00003210,
547         0x88d4, 0x0000001f, 0x00000010,
548         0x15c0, 0x000c0fc0, 0x000c0400
549 };
550
551 static const u32 hainan_golden_registers[] =
552 {
553         0x9a10, 0x00010000, 0x00018208,
554         0x9830, 0xffffffff, 0x00000000,
555         0x9834, 0xf00fffff, 0x00000400,
556         0x9838, 0x0002021c, 0x00020200,
557         0xd0c0, 0xff000fff, 0x00000100,
558         0xd030, 0x000300c0, 0x00800040,
559         0xd8c0, 0xff000fff, 0x00000100,
560         0xd830, 0x000300c0, 0x00800040,
561         0x2ae4, 0x00073ffe, 0x000022a2,
562         0x240c, 0x000007ff, 0x00000000,
563         0x8a14, 0xf000001f, 0x00000007,
564         0x8b24, 0xffffffff, 0x00ffffff,
565         0x8b10, 0x0000ff0f, 0x00000000,
566         0x28a4c, 0x07ffffff, 0x4e000000,
567         0x28350, 0x3f3f3fff, 0x00000000,
568         0x30, 0x000000ff, 0x0040,
569         0x34, 0x00000040, 0x00004040,
570         0x9100, 0x03e00000, 0x03600000,
571         0x9060, 0x0000007f, 0x00000020,
572         0x9508, 0x00010000, 0x00010000,
573         0xac14, 0x000003ff, 0x000000f1,
574         0xac10, 0xffffffff, 0x00000000,
575         0xac0c, 0xffffffff, 0x00003210,
576         0x88d4, 0x0000001f, 0x00000010,
577         0x15c0, 0x000c0fc0, 0x000c0400
578 };
579
580 static const u32 hainan_golden_registers2[] =
581 {
582         0x98f8, 0xffffffff, 0x02010001
583 };
584
585 static const u32 tahiti_mgcg_cgcg_init[] =
586 {
587         0xc400, 0xffffffff, 0xfffffffc,
588         0x802c, 0xffffffff, 0xe0000000,
589         0x9a60, 0xffffffff, 0x00000100,
590         0x92a4, 0xffffffff, 0x00000100,
591         0xc164, 0xffffffff, 0x00000100,
592         0x9774, 0xffffffff, 0x00000100,
593         0x8984, 0xffffffff, 0x06000100,
594         0x8a18, 0xffffffff, 0x00000100,
595         0x92a0, 0xffffffff, 0x00000100,
596         0xc380, 0xffffffff, 0x00000100,
597         0x8b28, 0xffffffff, 0x00000100,
598         0x9144, 0xffffffff, 0x00000100,
599         0x8d88, 0xffffffff, 0x00000100,
600         0x8d8c, 0xffffffff, 0x00000100,
601         0x9030, 0xffffffff, 0x00000100,
602         0x9034, 0xffffffff, 0x00000100,
603         0x9038, 0xffffffff, 0x00000100,
604         0x903c, 0xffffffff, 0x00000100,
605         0xad80, 0xffffffff, 0x00000100,
606         0xac54, 0xffffffff, 0x00000100,
607         0x897c, 0xffffffff, 0x06000100,
608         0x9868, 0xffffffff, 0x00000100,
609         0x9510, 0xffffffff, 0x00000100,
610         0xaf04, 0xffffffff, 0x00000100,
611         0xae04, 0xffffffff, 0x00000100,
612         0x949c, 0xffffffff, 0x00000100,
613         0x802c, 0xffffffff, 0xe0000000,
614         0x9160, 0xffffffff, 0x00010000,
615         0x9164, 0xffffffff, 0x00030002,
616         0x9168, 0xffffffff, 0x00040007,
617         0x916c, 0xffffffff, 0x00060005,
618         0x9170, 0xffffffff, 0x00090008,
619         0x9174, 0xffffffff, 0x00020001,
620         0x9178, 0xffffffff, 0x00040003,
621         0x917c, 0xffffffff, 0x00000007,
622         0x9180, 0xffffffff, 0x00060005,
623         0x9184, 0xffffffff, 0x00090008,
624         0x9188, 0xffffffff, 0x00030002,
625         0x918c, 0xffffffff, 0x00050004,
626         0x9190, 0xffffffff, 0x00000008,
627         0x9194, 0xffffffff, 0x00070006,
628         0x9198, 0xffffffff, 0x000a0009,
629         0x919c, 0xffffffff, 0x00040003,
630         0x91a0, 0xffffffff, 0x00060005,
631         0x91a4, 0xffffffff, 0x00000009,
632         0x91a8, 0xffffffff, 0x00080007,
633         0x91ac, 0xffffffff, 0x000b000a,
634         0x91b0, 0xffffffff, 0x00050004,
635         0x91b4, 0xffffffff, 0x00070006,
636         0x91b8, 0xffffffff, 0x0008000b,
637         0x91bc, 0xffffffff, 0x000a0009,
638         0x91c0, 0xffffffff, 0x000d000c,
639         0x91c4, 0xffffffff, 0x00060005,
640         0x91c8, 0xffffffff, 0x00080007,
641         0x91cc, 0xffffffff, 0x0000000b,
642         0x91d0, 0xffffffff, 0x000a0009,
643         0x91d4, 0xffffffff, 0x000d000c,
644         0x91d8, 0xffffffff, 0x00070006,
645         0x91dc, 0xffffffff, 0x00090008,
646         0x91e0, 0xffffffff, 0x0000000c,
647         0x91e4, 0xffffffff, 0x000b000a,
648         0x91e8, 0xffffffff, 0x000e000d,
649         0x91ec, 0xffffffff, 0x00080007,
650         0x91f0, 0xffffffff, 0x000a0009,
651         0x91f4, 0xffffffff, 0x0000000d,
652         0x91f8, 0xffffffff, 0x000c000b,
653         0x91fc, 0xffffffff, 0x000f000e,
654         0x9200, 0xffffffff, 0x00090008,
655         0x9204, 0xffffffff, 0x000b000a,
656         0x9208, 0xffffffff, 0x000c000f,
657         0x920c, 0xffffffff, 0x000e000d,
658         0x9210, 0xffffffff, 0x00110010,
659         0x9214, 0xffffffff, 0x000a0009,
660         0x9218, 0xffffffff, 0x000c000b,
661         0x921c, 0xffffffff, 0x0000000f,
662         0x9220, 0xffffffff, 0x000e000d,
663         0x9224, 0xffffffff, 0x00110010,
664         0x9228, 0xffffffff, 0x000b000a,
665         0x922c, 0xffffffff, 0x000d000c,
666         0x9230, 0xffffffff, 0x00000010,
667         0x9234, 0xffffffff, 0x000f000e,
668         0x9238, 0xffffffff, 0x00120011,
669         0x923c, 0xffffffff, 0x000c000b,
670         0x9240, 0xffffffff, 0x000e000d,
671         0x9244, 0xffffffff, 0x00000011,
672         0x9248, 0xffffffff, 0x0010000f,
673         0x924c, 0xffffffff, 0x00130012,
674         0x9250, 0xffffffff, 0x000d000c,
675         0x9254, 0xffffffff, 0x000f000e,
676         0x9258, 0xffffffff, 0x00100013,
677         0x925c, 0xffffffff, 0x00120011,
678         0x9260, 0xffffffff, 0x00150014,
679         0x9264, 0xffffffff, 0x000e000d,
680         0x9268, 0xffffffff, 0x0010000f,
681         0x926c, 0xffffffff, 0x00000013,
682         0x9270, 0xffffffff, 0x00120011,
683         0x9274, 0xffffffff, 0x00150014,
684         0x9278, 0xffffffff, 0x000f000e,
685         0x927c, 0xffffffff, 0x00110010,
686         0x9280, 0xffffffff, 0x00000014,
687         0x9284, 0xffffffff, 0x00130012,
688         0x9288, 0xffffffff, 0x00160015,
689         0x928c, 0xffffffff, 0x0010000f,
690         0x9290, 0xffffffff, 0x00120011,
691         0x9294, 0xffffffff, 0x00000015,
692         0x9298, 0xffffffff, 0x00140013,
693         0x929c, 0xffffffff, 0x00170016,
694         0x9150, 0xffffffff, 0x96940200,
695         0x8708, 0xffffffff, 0x00900100,
696         0xc478, 0xffffffff, 0x00000080,
697         0xc404, 0xffffffff, 0x0020003f,
698         0x30, 0xffffffff, 0x0000001c,
699         0x34, 0x000f0000, 0x000f0000,
700         0x160c, 0xffffffff, 0x00000100,
701         0x1024, 0xffffffff, 0x00000100,
702         0x102c, 0x00000101, 0x00000000,
703         0x20a8, 0xffffffff, 0x00000104,
704         0x264c, 0x000c0000, 0x000c0000,
705         0x2648, 0x000c0000, 0x000c0000,
706         0x55e4, 0xff000fff, 0x00000100,
707         0x55e8, 0x00000001, 0x00000001,
708         0x2f50, 0x00000001, 0x00000001,
709         0x30cc, 0xc0000fff, 0x00000104,
710         0xc1e4, 0x00000001, 0x00000001,
711         0xd0c0, 0xfffffff0, 0x00000100,
712         0xd8c0, 0xfffffff0, 0x00000100
713 };
714
715 static const u32 pitcairn_mgcg_cgcg_init[] =
716 {
717         0xc400, 0xffffffff, 0xfffffffc,
718         0x802c, 0xffffffff, 0xe0000000,
719         0x9a60, 0xffffffff, 0x00000100,
720         0x92a4, 0xffffffff, 0x00000100,
721         0xc164, 0xffffffff, 0x00000100,
722         0x9774, 0xffffffff, 0x00000100,
723         0x8984, 0xffffffff, 0x06000100,
724         0x8a18, 0xffffffff, 0x00000100,
725         0x92a0, 0xffffffff, 0x00000100,
726         0xc380, 0xffffffff, 0x00000100,
727         0x8b28, 0xffffffff, 0x00000100,
728         0x9144, 0xffffffff, 0x00000100,
729         0x8d88, 0xffffffff, 0x00000100,
730         0x8d8c, 0xffffffff, 0x00000100,
731         0x9030, 0xffffffff, 0x00000100,
732         0x9034, 0xffffffff, 0x00000100,
733         0x9038, 0xffffffff, 0x00000100,
734         0x903c, 0xffffffff, 0x00000100,
735         0xad80, 0xffffffff, 0x00000100,
736         0xac54, 0xffffffff, 0x00000100,
737         0x897c, 0xffffffff, 0x06000100,
738         0x9868, 0xffffffff, 0x00000100,
739         0x9510, 0xffffffff, 0x00000100,
740         0xaf04, 0xffffffff, 0x00000100,
741         0xae04, 0xffffffff, 0x00000100,
742         0x949c, 0xffffffff, 0x00000100,
743         0x802c, 0xffffffff, 0xe0000000,
744         0x9160, 0xffffffff, 0x00010000,
745         0x9164, 0xffffffff, 0x00030002,
746         0x9168, 0xffffffff, 0x00040007,
747         0x916c, 0xffffffff, 0x00060005,
748         0x9170, 0xffffffff, 0x00090008,
749         0x9174, 0xffffffff, 0x00020001,
750         0x9178, 0xffffffff, 0x00040003,
751         0x917c, 0xffffffff, 0x00000007,
752         0x9180, 0xffffffff, 0x00060005,
753         0x9184, 0xffffffff, 0x00090008,
754         0x9188, 0xffffffff, 0x00030002,
755         0x918c, 0xffffffff, 0x00050004,
756         0x9190, 0xffffffff, 0x00000008,
757         0x9194, 0xffffffff, 0x00070006,
758         0x9198, 0xffffffff, 0x000a0009,
759         0x919c, 0xffffffff, 0x00040003,
760         0x91a0, 0xffffffff, 0x00060005,
761         0x91a4, 0xffffffff, 0x00000009,
762         0x91a8, 0xffffffff, 0x00080007,
763         0x91ac, 0xffffffff, 0x000b000a,
764         0x91b0, 0xffffffff, 0x00050004,
765         0x91b4, 0xffffffff, 0x00070006,
766         0x91b8, 0xffffffff, 0x0008000b,
767         0x91bc, 0xffffffff, 0x000a0009,
768         0x91c0, 0xffffffff, 0x000d000c,
769         0x9200, 0xffffffff, 0x00090008,
770         0x9204, 0xffffffff, 0x000b000a,
771         0x9208, 0xffffffff, 0x000c000f,
772         0x920c, 0xffffffff, 0x000e000d,
773         0x9210, 0xffffffff, 0x00110010,
774         0x9214, 0xffffffff, 0x000a0009,
775         0x9218, 0xffffffff, 0x000c000b,
776         0x921c, 0xffffffff, 0x0000000f,
777         0x9220, 0xffffffff, 0x000e000d,
778         0x9224, 0xffffffff, 0x00110010,
779         0x9228, 0xffffffff, 0x000b000a,
780         0x922c, 0xffffffff, 0x000d000c,
781         0x9230, 0xffffffff, 0x00000010,
782         0x9234, 0xffffffff, 0x000f000e,
783         0x9238, 0xffffffff, 0x00120011,
784         0x923c, 0xffffffff, 0x000c000b,
785         0x9240, 0xffffffff, 0x000e000d,
786         0x9244, 0xffffffff, 0x00000011,
787         0x9248, 0xffffffff, 0x0010000f,
788         0x924c, 0xffffffff, 0x00130012,
789         0x9250, 0xffffffff, 0x000d000c,
790         0x9254, 0xffffffff, 0x000f000e,
791         0x9258, 0xffffffff, 0x00100013,
792         0x925c, 0xffffffff, 0x00120011,
793         0x9260, 0xffffffff, 0x00150014,
794         0x9150, 0xffffffff, 0x96940200,
795         0x8708, 0xffffffff, 0x00900100,
796         0xc478, 0xffffffff, 0x00000080,
797         0xc404, 0xffffffff, 0x0020003f,
798         0x30, 0xffffffff, 0x0000001c,
799         0x34, 0x000f0000, 0x000f0000,
800         0x160c, 0xffffffff, 0x00000100,
801         0x1024, 0xffffffff, 0x00000100,
802         0x102c, 0x00000101, 0x00000000,
803         0x20a8, 0xffffffff, 0x00000104,
804         0x55e4, 0xff000fff, 0x00000100,
805         0x55e8, 0x00000001, 0x00000001,
806         0x2f50, 0x00000001, 0x00000001,
807         0x30cc, 0xc0000fff, 0x00000104,
808         0xc1e4, 0x00000001, 0x00000001,
809         0xd0c0, 0xfffffff0, 0x00000100,
810         0xd8c0, 0xfffffff0, 0x00000100
811 };
812
813 static const u32 verde_mgcg_cgcg_init[] =
814 {
815         0xc400, 0xffffffff, 0xfffffffc,
816         0x802c, 0xffffffff, 0xe0000000,
817         0x9a60, 0xffffffff, 0x00000100,
818         0x92a4, 0xffffffff, 0x00000100,
819         0xc164, 0xffffffff, 0x00000100,
820         0x9774, 0xffffffff, 0x00000100,
821         0x8984, 0xffffffff, 0x06000100,
822         0x8a18, 0xffffffff, 0x00000100,
823         0x92a0, 0xffffffff, 0x00000100,
824         0xc380, 0xffffffff, 0x00000100,
825         0x8b28, 0xffffffff, 0x00000100,
826         0x9144, 0xffffffff, 0x00000100,
827         0x8d88, 0xffffffff, 0x00000100,
828         0x8d8c, 0xffffffff, 0x00000100,
829         0x9030, 0xffffffff, 0x00000100,
830         0x9034, 0xffffffff, 0x00000100,
831         0x9038, 0xffffffff, 0x00000100,
832         0x903c, 0xffffffff, 0x00000100,
833         0xad80, 0xffffffff, 0x00000100,
834         0xac54, 0xffffffff, 0x00000100,
835         0x897c, 0xffffffff, 0x06000100,
836         0x9868, 0xffffffff, 0x00000100,
837         0x9510, 0xffffffff, 0x00000100,
838         0xaf04, 0xffffffff, 0x00000100,
839         0xae04, 0xffffffff, 0x00000100,
840         0x949c, 0xffffffff, 0x00000100,
841         0x802c, 0xffffffff, 0xe0000000,
842         0x9160, 0xffffffff, 0x00010000,
843         0x9164, 0xffffffff, 0x00030002,
844         0x9168, 0xffffffff, 0x00040007,
845         0x916c, 0xffffffff, 0x00060005,
846         0x9170, 0xffffffff, 0x00090008,
847         0x9174, 0xffffffff, 0x00020001,
848         0x9178, 0xffffffff, 0x00040003,
849         0x917c, 0xffffffff, 0x00000007,
850         0x9180, 0xffffffff, 0x00060005,
851         0x9184, 0xffffffff, 0x00090008,
852         0x9188, 0xffffffff, 0x00030002,
853         0x918c, 0xffffffff, 0x00050004,
854         0x9190, 0xffffffff, 0x00000008,
855         0x9194, 0xffffffff, 0x00070006,
856         0x9198, 0xffffffff, 0x000a0009,
857         0x919c, 0xffffffff, 0x00040003,
858         0x91a0, 0xffffffff, 0x00060005,
859         0x91a4, 0xffffffff, 0x00000009,
860         0x91a8, 0xffffffff, 0x00080007,
861         0x91ac, 0xffffffff, 0x000b000a,
862         0x91b0, 0xffffffff, 0x00050004,
863         0x91b4, 0xffffffff, 0x00070006,
864         0x91b8, 0xffffffff, 0x0008000b,
865         0x91bc, 0xffffffff, 0x000a0009,
866         0x91c0, 0xffffffff, 0x000d000c,
867         0x9200, 0xffffffff, 0x00090008,
868         0x9204, 0xffffffff, 0x000b000a,
869         0x9208, 0xffffffff, 0x000c000f,
870         0x920c, 0xffffffff, 0x000e000d,
871         0x9210, 0xffffffff, 0x00110010,
872         0x9214, 0xffffffff, 0x000a0009,
873         0x9218, 0xffffffff, 0x000c000b,
874         0x921c, 0xffffffff, 0x0000000f,
875         0x9220, 0xffffffff, 0x000e000d,
876         0x9224, 0xffffffff, 0x00110010,
877         0x9228, 0xffffffff, 0x000b000a,
878         0x922c, 0xffffffff, 0x000d000c,
879         0x9230, 0xffffffff, 0x00000010,
880         0x9234, 0xffffffff, 0x000f000e,
881         0x9238, 0xffffffff, 0x00120011,
882         0x923c, 0xffffffff, 0x000c000b,
883         0x9240, 0xffffffff, 0x000e000d,
884         0x9244, 0xffffffff, 0x00000011,
885         0x9248, 0xffffffff, 0x0010000f,
886         0x924c, 0xffffffff, 0x00130012,
887         0x9250, 0xffffffff, 0x000d000c,
888         0x9254, 0xffffffff, 0x000f000e,
889         0x9258, 0xffffffff, 0x00100013,
890         0x925c, 0xffffffff, 0x00120011,
891         0x9260, 0xffffffff, 0x00150014,
892         0x9150, 0xffffffff, 0x96940200,
893         0x8708, 0xffffffff, 0x00900100,
894         0xc478, 0xffffffff, 0x00000080,
895         0xc404, 0xffffffff, 0x0020003f,
896         0x30, 0xffffffff, 0x0000001c,
897         0x34, 0x000f0000, 0x000f0000,
898         0x160c, 0xffffffff, 0x00000100,
899         0x1024, 0xffffffff, 0x00000100,
900         0x102c, 0x00000101, 0x00000000,
901         0x20a8, 0xffffffff, 0x00000104,
902         0x264c, 0x000c0000, 0x000c0000,
903         0x2648, 0x000c0000, 0x000c0000,
904         0x55e4, 0xff000fff, 0x00000100,
905         0x55e8, 0x00000001, 0x00000001,
906         0x2f50, 0x00000001, 0x00000001,
907         0x30cc, 0xc0000fff, 0x00000104,
908         0xc1e4, 0x00000001, 0x00000001,
909         0xd0c0, 0xfffffff0, 0x00000100,
910         0xd8c0, 0xfffffff0, 0x00000100
911 };
912
913 static const u32 oland_mgcg_cgcg_init[] =
914 {
915         0xc400, 0xffffffff, 0xfffffffc,
916         0x802c, 0xffffffff, 0xe0000000,
917         0x9a60, 0xffffffff, 0x00000100,
918         0x92a4, 0xffffffff, 0x00000100,
919         0xc164, 0xffffffff, 0x00000100,
920         0x9774, 0xffffffff, 0x00000100,
921         0x8984, 0xffffffff, 0x06000100,
922         0x8a18, 0xffffffff, 0x00000100,
923         0x92a0, 0xffffffff, 0x00000100,
924         0xc380, 0xffffffff, 0x00000100,
925         0x8b28, 0xffffffff, 0x00000100,
926         0x9144, 0xffffffff, 0x00000100,
927         0x8d88, 0xffffffff, 0x00000100,
928         0x8d8c, 0xffffffff, 0x00000100,
929         0x9030, 0xffffffff, 0x00000100,
930         0x9034, 0xffffffff, 0x00000100,
931         0x9038, 0xffffffff, 0x00000100,
932         0x903c, 0xffffffff, 0x00000100,
933         0xad80, 0xffffffff, 0x00000100,
934         0xac54, 0xffffffff, 0x00000100,
935         0x897c, 0xffffffff, 0x06000100,
936         0x9868, 0xffffffff, 0x00000100,
937         0x9510, 0xffffffff, 0x00000100,
938         0xaf04, 0xffffffff, 0x00000100,
939         0xae04, 0xffffffff, 0x00000100,
940         0x949c, 0xffffffff, 0x00000100,
941         0x802c, 0xffffffff, 0xe0000000,
942         0x9160, 0xffffffff, 0x00010000,
943         0x9164, 0xffffffff, 0x00030002,
944         0x9168, 0xffffffff, 0x00040007,
945         0x916c, 0xffffffff, 0x00060005,
946         0x9170, 0xffffffff, 0x00090008,
947         0x9174, 0xffffffff, 0x00020001,
948         0x9178, 0xffffffff, 0x00040003,
949         0x917c, 0xffffffff, 0x00000007,
950         0x9180, 0xffffffff, 0x00060005,
951         0x9184, 0xffffffff, 0x00090008,
952         0x9188, 0xffffffff, 0x00030002,
953         0x918c, 0xffffffff, 0x00050004,
954         0x9190, 0xffffffff, 0x00000008,
955         0x9194, 0xffffffff, 0x00070006,
956         0x9198, 0xffffffff, 0x000a0009,
957         0x919c, 0xffffffff, 0x00040003,
958         0x91a0, 0xffffffff, 0x00060005,
959         0x91a4, 0xffffffff, 0x00000009,
960         0x91a8, 0xffffffff, 0x00080007,
961         0x91ac, 0xffffffff, 0x000b000a,
962         0x91b0, 0xffffffff, 0x00050004,
963         0x91b4, 0xffffffff, 0x00070006,
964         0x91b8, 0xffffffff, 0x0008000b,
965         0x91bc, 0xffffffff, 0x000a0009,
966         0x91c0, 0xffffffff, 0x000d000c,
967         0x91c4, 0xffffffff, 0x00060005,
968         0x91c8, 0xffffffff, 0x00080007,
969         0x91cc, 0xffffffff, 0x0000000b,
970         0x91d0, 0xffffffff, 0x000a0009,
971         0x91d4, 0xffffffff, 0x000d000c,
972         0x9150, 0xffffffff, 0x96940200,
973         0x8708, 0xffffffff, 0x00900100,
974         0xc478, 0xffffffff, 0x00000080,
975         0xc404, 0xffffffff, 0x0020003f,
976         0x30, 0xffffffff, 0x0000001c,
977         0x34, 0x000f0000, 0x000f0000,
978         0x160c, 0xffffffff, 0x00000100,
979         0x1024, 0xffffffff, 0x00000100,
980         0x102c, 0x00000101, 0x00000000,
981         0x20a8, 0xffffffff, 0x00000104,
982         0x264c, 0x000c0000, 0x000c0000,
983         0x2648, 0x000c0000, 0x000c0000,
984         0x55e4, 0xff000fff, 0x00000100,
985         0x55e8, 0x00000001, 0x00000001,
986         0x2f50, 0x00000001, 0x00000001,
987         0x30cc, 0xc0000fff, 0x00000104,
988         0xc1e4, 0x00000001, 0x00000001,
989         0xd0c0, 0xfffffff0, 0x00000100,
990         0xd8c0, 0xfffffff0, 0x00000100
991 };
992
993 static const u32 hainan_mgcg_cgcg_init[] =
994 {
995         0xc400, 0xffffffff, 0xfffffffc,
996         0x802c, 0xffffffff, 0xe0000000,
997         0x9a60, 0xffffffff, 0x00000100,
998         0x92a4, 0xffffffff, 0x00000100,
999         0xc164, 0xffffffff, 0x00000100,
1000         0x9774, 0xffffffff, 0x00000100,
1001         0x8984, 0xffffffff, 0x06000100,
1002         0x8a18, 0xffffffff, 0x00000100,
1003         0x92a0, 0xffffffff, 0x00000100,
1004         0xc380, 0xffffffff, 0x00000100,
1005         0x8b28, 0xffffffff, 0x00000100,
1006         0x9144, 0xffffffff, 0x00000100,
1007         0x8d88, 0xffffffff, 0x00000100,
1008         0x8d8c, 0xffffffff, 0x00000100,
1009         0x9030, 0xffffffff, 0x00000100,
1010         0x9034, 0xffffffff, 0x00000100,
1011         0x9038, 0xffffffff, 0x00000100,
1012         0x903c, 0xffffffff, 0x00000100,
1013         0xad80, 0xffffffff, 0x00000100,
1014         0xac54, 0xffffffff, 0x00000100,
1015         0x897c, 0xffffffff, 0x06000100,
1016         0x9868, 0xffffffff, 0x00000100,
1017         0x9510, 0xffffffff, 0x00000100,
1018         0xaf04, 0xffffffff, 0x00000100,
1019         0xae04, 0xffffffff, 0x00000100,
1020         0x949c, 0xffffffff, 0x00000100,
1021         0x802c, 0xffffffff, 0xe0000000,
1022         0x9160, 0xffffffff, 0x00010000,
1023         0x9164, 0xffffffff, 0x00030002,
1024         0x9168, 0xffffffff, 0x00040007,
1025         0x916c, 0xffffffff, 0x00060005,
1026         0x9170, 0xffffffff, 0x00090008,
1027         0x9174, 0xffffffff, 0x00020001,
1028         0x9178, 0xffffffff, 0x00040003,
1029         0x917c, 0xffffffff, 0x00000007,
1030         0x9180, 0xffffffff, 0x00060005,
1031         0x9184, 0xffffffff, 0x00090008,
1032         0x9188, 0xffffffff, 0x00030002,
1033         0x918c, 0xffffffff, 0x00050004,
1034         0x9190, 0xffffffff, 0x00000008,
1035         0x9194, 0xffffffff, 0x00070006,
1036         0x9198, 0xffffffff, 0x000a0009,
1037         0x919c, 0xffffffff, 0x00040003,
1038         0x91a0, 0xffffffff, 0x00060005,
1039         0x91a4, 0xffffffff, 0x00000009,
1040         0x91a8, 0xffffffff, 0x00080007,
1041         0x91ac, 0xffffffff, 0x000b000a,
1042         0x91b0, 0xffffffff, 0x00050004,
1043         0x91b4, 0xffffffff, 0x00070006,
1044         0x91b8, 0xffffffff, 0x0008000b,
1045         0x91bc, 0xffffffff, 0x000a0009,
1046         0x91c0, 0xffffffff, 0x000d000c,
1047         0x91c4, 0xffffffff, 0x00060005,
1048         0x91c8, 0xffffffff, 0x00080007,
1049         0x91cc, 0xffffffff, 0x0000000b,
1050         0x91d0, 0xffffffff, 0x000a0009,
1051         0x91d4, 0xffffffff, 0x000d000c,
1052         0x9150, 0xffffffff, 0x96940200,
1053         0x8708, 0xffffffff, 0x00900100,
1054         0xc478, 0xffffffff, 0x00000080,
1055         0xc404, 0xffffffff, 0x0020003f,
1056         0x30, 0xffffffff, 0x0000001c,
1057         0x34, 0x000f0000, 0x000f0000,
1058         0x160c, 0xffffffff, 0x00000100,
1059         0x1024, 0xffffffff, 0x00000100,
1060         0x20a8, 0xffffffff, 0x00000104,
1061         0x264c, 0x000c0000, 0x000c0000,
1062         0x2648, 0x000c0000, 0x000c0000,
1063         0x2f50, 0x00000001, 0x00000001,
1064         0x30cc, 0xc0000fff, 0x00000104,
1065         0xc1e4, 0x00000001, 0x00000001,
1066         0xd0c0, 0xfffffff0, 0x00000100,
1067         0xd8c0, 0xfffffff0, 0x00000100
1068 };
1069
1070 static u32 verde_pg_init[] =
1071 {
1072         0x353c, 0xffffffff, 0x40000,
1073         0x3538, 0xffffffff, 0x200010ff,
1074         0x353c, 0xffffffff, 0x0,
1075         0x353c, 0xffffffff, 0x0,
1076         0x353c, 0xffffffff, 0x0,
1077         0x353c, 0xffffffff, 0x0,
1078         0x353c, 0xffffffff, 0x0,
1079         0x353c, 0xffffffff, 0x7007,
1080         0x3538, 0xffffffff, 0x300010ff,
1081         0x353c, 0xffffffff, 0x0,
1082         0x353c, 0xffffffff, 0x0,
1083         0x353c, 0xffffffff, 0x0,
1084         0x353c, 0xffffffff, 0x0,
1085         0x353c, 0xffffffff, 0x0,
1086         0x353c, 0xffffffff, 0x400000,
1087         0x3538, 0xffffffff, 0x100010ff,
1088         0x353c, 0xffffffff, 0x0,
1089         0x353c, 0xffffffff, 0x0,
1090         0x353c, 0xffffffff, 0x0,
1091         0x353c, 0xffffffff, 0x0,
1092         0x353c, 0xffffffff, 0x0,
1093         0x353c, 0xffffffff, 0x120200,
1094         0x3538, 0xffffffff, 0x500010ff,
1095         0x353c, 0xffffffff, 0x0,
1096         0x353c, 0xffffffff, 0x0,
1097         0x353c, 0xffffffff, 0x0,
1098         0x353c, 0xffffffff, 0x0,
1099         0x353c, 0xffffffff, 0x0,
1100         0x353c, 0xffffffff, 0x1e1e16,
1101         0x3538, 0xffffffff, 0x600010ff,
1102         0x353c, 0xffffffff, 0x0,
1103         0x353c, 0xffffffff, 0x0,
1104         0x353c, 0xffffffff, 0x0,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x0,
1107         0x353c, 0xffffffff, 0x171f1e,
1108         0x3538, 0xffffffff, 0x700010ff,
1109         0x353c, 0xffffffff, 0x0,
1110         0x353c, 0xffffffff, 0x0,
1111         0x353c, 0xffffffff, 0x0,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x0,
1114         0x353c, 0xffffffff, 0x0,
1115         0x3538, 0xffffffff, 0x9ff,
1116         0x3500, 0xffffffff, 0x0,
1117         0x3504, 0xffffffff, 0x10000800,
1118         0x3504, 0xffffffff, 0xf,
1119         0x3504, 0xffffffff, 0xf,
1120         0x3500, 0xffffffff, 0x4,
1121         0x3504, 0xffffffff, 0x1000051e,
1122         0x3504, 0xffffffff, 0xffff,
1123         0x3504, 0xffffffff, 0xffff,
1124         0x3500, 0xffffffff, 0x8,
1125         0x3504, 0xffffffff, 0x80500,
1126         0x3500, 0xffffffff, 0x12,
1127         0x3504, 0xffffffff, 0x9050c,
1128         0x3500, 0xffffffff, 0x1d,
1129         0x3504, 0xffffffff, 0xb052c,
1130         0x3500, 0xffffffff, 0x2a,
1131         0x3504, 0xffffffff, 0x1053e,
1132         0x3500, 0xffffffff, 0x2d,
1133         0x3504, 0xffffffff, 0x10546,
1134         0x3500, 0xffffffff, 0x30,
1135         0x3504, 0xffffffff, 0xa054e,
1136         0x3500, 0xffffffff, 0x3c,
1137         0x3504, 0xffffffff, 0x1055f,
1138         0x3500, 0xffffffff, 0x3f,
1139         0x3504, 0xffffffff, 0x10567,
1140         0x3500, 0xffffffff, 0x42,
1141         0x3504, 0xffffffff, 0x1056f,
1142         0x3500, 0xffffffff, 0x45,
1143         0x3504, 0xffffffff, 0x10572,
1144         0x3500, 0xffffffff, 0x48,
1145         0x3504, 0xffffffff, 0x20575,
1146         0x3500, 0xffffffff, 0x4c,
1147         0x3504, 0xffffffff, 0x190801,
1148         0x3500, 0xffffffff, 0x67,
1149         0x3504, 0xffffffff, 0x1082a,
1150         0x3500, 0xffffffff, 0x6a,
1151         0x3504, 0xffffffff, 0x1b082d,
1152         0x3500, 0xffffffff, 0x87,
1153         0x3504, 0xffffffff, 0x310851,
1154         0x3500, 0xffffffff, 0xba,
1155         0x3504, 0xffffffff, 0x891,
1156         0x3500, 0xffffffff, 0xbc,
1157         0x3504, 0xffffffff, 0x893,
1158         0x3500, 0xffffffff, 0xbe,
1159         0x3504, 0xffffffff, 0x20895,
1160         0x3500, 0xffffffff, 0xc2,
1161         0x3504, 0xffffffff, 0x20899,
1162         0x3500, 0xffffffff, 0xc6,
1163         0x3504, 0xffffffff, 0x2089d,
1164         0x3500, 0xffffffff, 0xca,
1165         0x3504, 0xffffffff, 0x8a1,
1166         0x3500, 0xffffffff, 0xcc,
1167         0x3504, 0xffffffff, 0x8a3,
1168         0x3500, 0xffffffff, 0xce,
1169         0x3504, 0xffffffff, 0x308a5,
1170         0x3500, 0xffffffff, 0xd3,
1171         0x3504, 0xffffffff, 0x6d08cd,
1172         0x3500, 0xffffffff, 0x142,
1173         0x3504, 0xffffffff, 0x2000095a,
1174         0x3504, 0xffffffff, 0x1,
1175         0x3500, 0xffffffff, 0x144,
1176         0x3504, 0xffffffff, 0x301f095b,
1177         0x3500, 0xffffffff, 0x165,
1178         0x3504, 0xffffffff, 0xc094d,
1179         0x3500, 0xffffffff, 0x173,
1180         0x3504, 0xffffffff, 0xf096d,
1181         0x3500, 0xffffffff, 0x184,
1182         0x3504, 0xffffffff, 0x15097f,
1183         0x3500, 0xffffffff, 0x19b,
1184         0x3504, 0xffffffff, 0xc0998,
1185         0x3500, 0xffffffff, 0x1a9,
1186         0x3504, 0xffffffff, 0x409a7,
1187         0x3500, 0xffffffff, 0x1af,
1188         0x3504, 0xffffffff, 0xcdc,
1189         0x3500, 0xffffffff, 0x1b1,
1190         0x3504, 0xffffffff, 0x800,
1191         0x3508, 0xffffffff, 0x6c9b2000,
1192         0x3510, 0xfc00, 0x2000,
1193         0x3544, 0xffffffff, 0xfc0,
1194         0x28d4, 0x00000100, 0x100
1195 };
1196
1197 static void si_init_golden_registers(struct radeon_device *rdev)
1198 {
1199         switch (rdev->family) {
1200         case CHIP_TAHITI:
1201                 radeon_program_register_sequence(rdev,
1202                                                  tahiti_golden_registers,
1203                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1204                 radeon_program_register_sequence(rdev,
1205                                                  tahiti_golden_rlc_registers,
1206                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1207                 radeon_program_register_sequence(rdev,
1208                                                  tahiti_mgcg_cgcg_init,
1209                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1210                 radeon_program_register_sequence(rdev,
1211                                                  tahiti_golden_registers2,
1212                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1213                 break;
1214         case CHIP_PITCAIRN:
1215                 radeon_program_register_sequence(rdev,
1216                                                  pitcairn_golden_registers,
1217                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1218                 radeon_program_register_sequence(rdev,
1219                                                  pitcairn_golden_rlc_registers,
1220                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1221                 radeon_program_register_sequence(rdev,
1222                                                  pitcairn_mgcg_cgcg_init,
1223                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1224                 break;
1225         case CHIP_VERDE:
1226                 radeon_program_register_sequence(rdev,
1227                                                  verde_golden_registers,
1228                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1229                 radeon_program_register_sequence(rdev,
1230                                                  verde_golden_rlc_registers,
1231                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1232                 radeon_program_register_sequence(rdev,
1233                                                  verde_mgcg_cgcg_init,
1234                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1235                 radeon_program_register_sequence(rdev,
1236                                                  verde_pg_init,
1237                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1238                 break;
1239         case CHIP_OLAND:
1240                 radeon_program_register_sequence(rdev,
1241                                                  oland_golden_registers,
1242                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1243                 radeon_program_register_sequence(rdev,
1244                                                  oland_golden_rlc_registers,
1245                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1246                 radeon_program_register_sequence(rdev,
1247                                                  oland_mgcg_cgcg_init,
1248                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1249                 break;
1250         case CHIP_HAINAN:
1251                 radeon_program_register_sequence(rdev,
1252                                                  hainan_golden_registers,
1253                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1254                 radeon_program_register_sequence(rdev,
1255                                                  hainan_golden_registers2,
1256                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1257                 radeon_program_register_sequence(rdev,
1258                                                  hainan_mgcg_cgcg_init,
1259                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1260                 break;
1261         default:
1262                 break;
1263         }
1264 }
1265
1266 #define PCIE_BUS_CLK                10000
1267 #define TCLK                        (PCIE_BUS_CLK / 10)
1268
1269 /**
1270  * si_get_xclk - get the xclk
1271  *
1272  * @rdev: radeon_device pointer
1273  *
1274  * Returns the reference clock used by the gfx engine
1275  * (SI).
1276  */
1277 u32 si_get_xclk(struct radeon_device *rdev)
1278 {
1279         u32 reference_clock = rdev->clock.spll.reference_freq;
1280         u32 tmp;
1281
1282         tmp = RREG32(CG_CLKPIN_CNTL_2);
1283         if (tmp & MUX_TCLK_TO_XCLK)
1284                 return TCLK;
1285
1286         tmp = RREG32(CG_CLKPIN_CNTL);
1287         if (tmp & XTALIN_DIVIDE)
1288                 return reference_clock / 4;
1289
1290         return reference_clock;
1291 }
1292
1293 /* get temperature in millidegrees */
1294 int si_get_temp(struct radeon_device *rdev)
1295 {
1296         u32 temp;
1297         int actual_temp = 0;
1298
1299         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1300                 CTF_TEMP_SHIFT;
1301
1302         if (temp & 0x200)
1303                 actual_temp = 255;
1304         else
1305                 actual_temp = temp & 0x1ff;
1306
1307         actual_temp = (actual_temp * 1000);
1308
1309         return actual_temp;
1310 }
1311
1312 #define TAHITI_IO_MC_REGS_SIZE 36
1313
1314 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1315         {0x0000006f, 0x03044000},
1316         {0x00000070, 0x0480c018},
1317         {0x00000071, 0x00000040},
1318         {0x00000072, 0x01000000},
1319         {0x00000074, 0x000000ff},
1320         {0x00000075, 0x00143400},
1321         {0x00000076, 0x08ec0800},
1322         {0x00000077, 0x040000cc},
1323         {0x00000079, 0x00000000},
1324         {0x0000007a, 0x21000409},
1325         {0x0000007c, 0x00000000},
1326         {0x0000007d, 0xe8000000},
1327         {0x0000007e, 0x044408a8},
1328         {0x0000007f, 0x00000003},
1329         {0x00000080, 0x00000000},
1330         {0x00000081, 0x01000000},
1331         {0x00000082, 0x02000000},
1332         {0x00000083, 0x00000000},
1333         {0x00000084, 0xe3f3e4f4},
1334         {0x00000085, 0x00052024},
1335         {0x00000087, 0x00000000},
1336         {0x00000088, 0x66036603},
1337         {0x00000089, 0x01000000},
1338         {0x0000008b, 0x1c0a0000},
1339         {0x0000008c, 0xff010000},
1340         {0x0000008e, 0xffffefff},
1341         {0x0000008f, 0xfff3efff},
1342         {0x00000090, 0xfff3efbf},
1343         {0x00000094, 0x00101101},
1344         {0x00000095, 0x00000fff},
1345         {0x00000096, 0x00116fff},
1346         {0x00000097, 0x60010000},
1347         {0x00000098, 0x10010000},
1348         {0x00000099, 0x00006000},
1349         {0x0000009a, 0x00001000},
1350         {0x0000009f, 0x00a77400}
1351 };
1352
1353 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1354         {0x0000006f, 0x03044000},
1355         {0x00000070, 0x0480c018},
1356         {0x00000071, 0x00000040},
1357         {0x00000072, 0x01000000},
1358         {0x00000074, 0x000000ff},
1359         {0x00000075, 0x00143400},
1360         {0x00000076, 0x08ec0800},
1361         {0x00000077, 0x040000cc},
1362         {0x00000079, 0x00000000},
1363         {0x0000007a, 0x21000409},
1364         {0x0000007c, 0x00000000},
1365         {0x0000007d, 0xe8000000},
1366         {0x0000007e, 0x044408a8},
1367         {0x0000007f, 0x00000003},
1368         {0x00000080, 0x00000000},
1369         {0x00000081, 0x01000000},
1370         {0x00000082, 0x02000000},
1371         {0x00000083, 0x00000000},
1372         {0x00000084, 0xe3f3e4f4},
1373         {0x00000085, 0x00052024},
1374         {0x00000087, 0x00000000},
1375         {0x00000088, 0x66036603},
1376         {0x00000089, 0x01000000},
1377         {0x0000008b, 0x1c0a0000},
1378         {0x0000008c, 0xff010000},
1379         {0x0000008e, 0xffffefff},
1380         {0x0000008f, 0xfff3efff},
1381         {0x00000090, 0xfff3efbf},
1382         {0x00000094, 0x00101101},
1383         {0x00000095, 0x00000fff},
1384         {0x00000096, 0x00116fff},
1385         {0x00000097, 0x60010000},
1386         {0x00000098, 0x10010000},
1387         {0x00000099, 0x00006000},
1388         {0x0000009a, 0x00001000},
1389         {0x0000009f, 0x00a47400}
1390 };
1391
1392 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1393         {0x0000006f, 0x03044000},
1394         {0x00000070, 0x0480c018},
1395         {0x00000071, 0x00000040},
1396         {0x00000072, 0x01000000},
1397         {0x00000074, 0x000000ff},
1398         {0x00000075, 0x00143400},
1399         {0x00000076, 0x08ec0800},
1400         {0x00000077, 0x040000cc},
1401         {0x00000079, 0x00000000},
1402         {0x0000007a, 0x21000409},
1403         {0x0000007c, 0x00000000},
1404         {0x0000007d, 0xe8000000},
1405         {0x0000007e, 0x044408a8},
1406         {0x0000007f, 0x00000003},
1407         {0x00000080, 0x00000000},
1408         {0x00000081, 0x01000000},
1409         {0x00000082, 0x02000000},
1410         {0x00000083, 0x00000000},
1411         {0x00000084, 0xe3f3e4f4},
1412         {0x00000085, 0x00052024},
1413         {0x00000087, 0x00000000},
1414         {0x00000088, 0x66036603},
1415         {0x00000089, 0x01000000},
1416         {0x0000008b, 0x1c0a0000},
1417         {0x0000008c, 0xff010000},
1418         {0x0000008e, 0xffffefff},
1419         {0x0000008f, 0xfff3efff},
1420         {0x00000090, 0xfff3efbf},
1421         {0x00000094, 0x00101101},
1422         {0x00000095, 0x00000fff},
1423         {0x00000096, 0x00116fff},
1424         {0x00000097, 0x60010000},
1425         {0x00000098, 0x10010000},
1426         {0x00000099, 0x00006000},
1427         {0x0000009a, 0x00001000},
1428         {0x0000009f, 0x00a37400}
1429 };
1430
1431 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1432         {0x0000006f, 0x03044000},
1433         {0x00000070, 0x0480c018},
1434         {0x00000071, 0x00000040},
1435         {0x00000072, 0x01000000},
1436         {0x00000074, 0x000000ff},
1437         {0x00000075, 0x00143400},
1438         {0x00000076, 0x08ec0800},
1439         {0x00000077, 0x040000cc},
1440         {0x00000079, 0x00000000},
1441         {0x0000007a, 0x21000409},
1442         {0x0000007c, 0x00000000},
1443         {0x0000007d, 0xe8000000},
1444         {0x0000007e, 0x044408a8},
1445         {0x0000007f, 0x00000003},
1446         {0x00000080, 0x00000000},
1447         {0x00000081, 0x01000000},
1448         {0x00000082, 0x02000000},
1449         {0x00000083, 0x00000000},
1450         {0x00000084, 0xe3f3e4f4},
1451         {0x00000085, 0x00052024},
1452         {0x00000087, 0x00000000},
1453         {0x00000088, 0x66036603},
1454         {0x00000089, 0x01000000},
1455         {0x0000008b, 0x1c0a0000},
1456         {0x0000008c, 0xff010000},
1457         {0x0000008e, 0xffffefff},
1458         {0x0000008f, 0xfff3efff},
1459         {0x00000090, 0xfff3efbf},
1460         {0x00000094, 0x00101101},
1461         {0x00000095, 0x00000fff},
1462         {0x00000096, 0x00116fff},
1463         {0x00000097, 0x60010000},
1464         {0x00000098, 0x10010000},
1465         {0x00000099, 0x00006000},
1466         {0x0000009a, 0x00001000},
1467         {0x0000009f, 0x00a17730}
1468 };
1469
1470 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1471         {0x0000006f, 0x03044000},
1472         {0x00000070, 0x0480c018},
1473         {0x00000071, 0x00000040},
1474         {0x00000072, 0x01000000},
1475         {0x00000074, 0x000000ff},
1476         {0x00000075, 0x00143400},
1477         {0x00000076, 0x08ec0800},
1478         {0x00000077, 0x040000cc},
1479         {0x00000079, 0x00000000},
1480         {0x0000007a, 0x21000409},
1481         {0x0000007c, 0x00000000},
1482         {0x0000007d, 0xe8000000},
1483         {0x0000007e, 0x044408a8},
1484         {0x0000007f, 0x00000003},
1485         {0x00000080, 0x00000000},
1486         {0x00000081, 0x01000000},
1487         {0x00000082, 0x02000000},
1488         {0x00000083, 0x00000000},
1489         {0x00000084, 0xe3f3e4f4},
1490         {0x00000085, 0x00052024},
1491         {0x00000087, 0x00000000},
1492         {0x00000088, 0x66036603},
1493         {0x00000089, 0x01000000},
1494         {0x0000008b, 0x1c0a0000},
1495         {0x0000008c, 0xff010000},
1496         {0x0000008e, 0xffffefff},
1497         {0x0000008f, 0xfff3efff},
1498         {0x00000090, 0xfff3efbf},
1499         {0x00000094, 0x00101101},
1500         {0x00000095, 0x00000fff},
1501         {0x00000096, 0x00116fff},
1502         {0x00000097, 0x60010000},
1503         {0x00000098, 0x10010000},
1504         {0x00000099, 0x00006000},
1505         {0x0000009a, 0x00001000},
1506         {0x0000009f, 0x00a07730}
1507 };
1508
1509 /* ucode loading */
1510 int si_mc_load_microcode(struct radeon_device *rdev)
1511 {
1512         const __be32 *fw_data = NULL;
1513         const __le32 *new_fw_data = NULL;
1514         u32 running, blackout = 0;
1515         u32 *io_mc_regs = NULL;
1516         const __le32 *new_io_mc_regs = NULL;
1517         int i, regs_size, ucode_size;
1518
1519         if (!rdev->mc_fw)
1520                 return -EINVAL;
1521
1522         if (rdev->new_fw) {
1523                 const struct mc_firmware_header_v1_0 *hdr =
1524                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1525
1526                 radeon_ucode_print_mc_hdr(&hdr->header);
1527                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1528                 new_io_mc_regs = (const __le32 *)
1529                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1530                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1531                 new_fw_data = (const __le32 *)
1532                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1533         } else {
1534                 ucode_size = rdev->mc_fw->size / 4;
1535
1536                 switch (rdev->family) {
1537                 case CHIP_TAHITI:
1538                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1539                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1540                         break;
1541                 case CHIP_PITCAIRN:
1542                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1543                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1544                         break;
1545                 case CHIP_VERDE:
1546                 default:
1547                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1548                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1549                         break;
1550                 case CHIP_OLAND:
1551                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1552                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1553                         break;
1554                 case CHIP_HAINAN:
1555                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1556                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1557                         break;
1558                 }
1559                 fw_data = (const __be32 *)rdev->mc_fw->data;
1560         }
1561
1562         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1563
1564         if (running == 0) {
1565                 if (running) {
1566                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1567                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1568                 }
1569
1570                 /* reset the engine and set to writable */
1571                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1572                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1573
1574                 /* load mc io regs */
1575                 for (i = 0; i < regs_size; i++) {
1576                         if (rdev->new_fw) {
1577                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1578                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1579                         } else {
1580                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1581                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1582                         }
1583                 }
1584                 /* load the MC ucode */
1585                 for (i = 0; i < ucode_size; i++) {
1586                         if (rdev->new_fw)
1587                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1588                         else
1589                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1590                 }
1591
1592                 /* put the engine back into the active state */
1593                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1594                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1595                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1596
1597                 /* wait for training to complete */
1598                 for (i = 0; i < rdev->usec_timeout; i++) {
1599                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1600                                 break;
1601                         udelay(1);
1602                 }
1603                 for (i = 0; i < rdev->usec_timeout; i++) {
1604                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1605                                 break;
1606                         udelay(1);
1607                 }
1608
1609                 if (running)
1610                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1611         }
1612
1613         return 0;
1614 }
1615
1616 static int si_init_microcode(struct radeon_device *rdev)
1617 {
1618         const char *chip_name;
1619         const char *new_chip_name;
1620         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1621         size_t smc_req_size, mc2_req_size;
1622         char fw_name[30];
1623         int err;
1624         int new_fw = 0;
1625
1626         DRM_DEBUG("\n");
1627
1628         switch (rdev->family) {
1629         case CHIP_TAHITI:
1630                 chip_name = "TAHITI";
1631                 new_chip_name = "tahiti";
1632                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1633                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1634                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1635                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1636                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1637                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1638                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1639                 break;
1640         case CHIP_PITCAIRN:
1641                 chip_name = "PITCAIRN";
1642                 new_chip_name = "pitcairn";
1643                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1644                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1645                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1646                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1647                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1648                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1649                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1650                 break;
1651         case CHIP_VERDE:
1652                 chip_name = "VERDE";
1653                 new_chip_name = "verde";
1654                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1655                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1656                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1657                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1658                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1659                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1660                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1661                 break;
1662         case CHIP_OLAND:
1663                 chip_name = "OLAND";
1664                 new_chip_name = "oland";
1665                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1667                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1668                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1670                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1671                 break;
1672         case CHIP_HAINAN:
1673                 chip_name = "HAINAN";
1674                 new_chip_name = "hainan";
1675                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1676                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1677                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1678                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1679                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1680                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1681                 break;
1682         default: BUG();
1683         }
1684
1685         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1686
1687         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1688         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1689         if (err) {
1690                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1691                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1692                 if (err)
1693                         goto out;
1694                 if (rdev->pfp_fw->size != pfp_req_size) {
1695                         printk(KERN_ERR
1696                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1697                                rdev->pfp_fw->size, fw_name);
1698                         err = -EINVAL;
1699                         goto out;
1700                 }
1701         } else {
1702                 err = radeon_ucode_validate(rdev->pfp_fw);
1703                 if (err) {
1704                         printk(KERN_ERR
1705                                "si_cp: validation failed for firmware \"%s\"\n",
1706                                fw_name);
1707                         goto out;
1708                 } else {
1709                         new_fw++;
1710                 }
1711         }
1712
1713         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1714         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1715         if (err) {
1716                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1717                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1718                 if (err)
1719                         goto out;
1720                 if (rdev->me_fw->size != me_req_size) {
1721                         printk(KERN_ERR
1722                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1723                                rdev->me_fw->size, fw_name);
1724                         err = -EINVAL;
1725                 }
1726         } else {
1727                 err = radeon_ucode_validate(rdev->me_fw);
1728                 if (err) {
1729                         printk(KERN_ERR
1730                                "si_cp: validation failed for firmware \"%s\"\n",
1731                                fw_name);
1732                         goto out;
1733                 } else {
1734                         new_fw++;
1735                 }
1736         }
1737
1738         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1739         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1740         if (err) {
1741                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1742                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1743                 if (err)
1744                         goto out;
1745                 if (rdev->ce_fw->size != ce_req_size) {
1746                         printk(KERN_ERR
1747                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1748                                rdev->ce_fw->size, fw_name);
1749                         err = -EINVAL;
1750                 }
1751         } else {
1752                 err = radeon_ucode_validate(rdev->ce_fw);
1753                 if (err) {
1754                         printk(KERN_ERR
1755                                "si_cp: validation failed for firmware \"%s\"\n",
1756                                fw_name);
1757                         goto out;
1758                 } else {
1759                         new_fw++;
1760                 }
1761         }
1762
1763         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1764         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1765         if (err) {
1766                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1767                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1768                 if (err)
1769                         goto out;
1770                 if (rdev->rlc_fw->size != rlc_req_size) {
1771                         printk(KERN_ERR
1772                                "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1773                                rdev->rlc_fw->size, fw_name);
1774                         err = -EINVAL;
1775                 }
1776         } else {
1777                 err = radeon_ucode_validate(rdev->rlc_fw);
1778                 if (err) {
1779                         printk(KERN_ERR
1780                                "si_cp: validation failed for firmware \"%s\"\n",
1781                                fw_name);
1782                         goto out;
1783                 } else {
1784                         new_fw++;
1785                 }
1786         }
1787
1788         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1789         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1790         if (err) {
1791                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1792                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1793                 if (err) {
1794                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1795                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1796                         if (err)
1797                                 goto out;
1798                 }
1799                 if ((rdev->mc_fw->size != mc_req_size) &&
1800                     (rdev->mc_fw->size != mc2_req_size)) {
1801                         printk(KERN_ERR
1802                                "si_mc: Bogus length %zu in firmware \"%s\"\n",
1803                                rdev->mc_fw->size, fw_name);
1804                         err = -EINVAL;
1805                 }
1806                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1807         } else {
1808                 err = radeon_ucode_validate(rdev->mc_fw);
1809                 if (err) {
1810                         printk(KERN_ERR
1811                                "si_cp: validation failed for firmware \"%s\"\n",
1812                                fw_name);
1813                         goto out;
1814                 } else {
1815                         new_fw++;
1816                 }
1817         }
1818
1819         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1820         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1821         if (err) {
1822                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1823                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1824                 if (err) {
1825                         printk(KERN_ERR
1826                                "smc: error loading firmware \"%s\"\n",
1827                                fw_name);
1828                         release_firmware(rdev->smc_fw);
1829                         rdev->smc_fw = NULL;
1830                         err = 0;
1831                 } else if (rdev->smc_fw->size != smc_req_size) {
1832                         printk(KERN_ERR
1833                                "si_smc: Bogus length %zu in firmware \"%s\"\n",
1834                                rdev->smc_fw->size, fw_name);
1835                         err = -EINVAL;
1836                 }
1837         } else {
1838                 err = radeon_ucode_validate(rdev->smc_fw);
1839                 if (err) {
1840                         printk(KERN_ERR
1841                                "si_cp: validation failed for firmware \"%s\"\n",
1842                                fw_name);
1843                         goto out;
1844                 } else {
1845                         new_fw++;
1846                 }
1847         }
1848
1849         if (new_fw == 0) {
1850                 rdev->new_fw = false;
1851         } else if (new_fw < 6) {
1852                 printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1853                 err = -EINVAL;
1854         } else {
1855                 rdev->new_fw = true;
1856         }
1857 out:
1858         if (err) {
1859                 if (err != -EINVAL)
1860                         printk(KERN_ERR
1861                                "si_cp: Failed to load firmware \"%s\"\n",
1862                                fw_name);
1863                 release_firmware(rdev->pfp_fw);
1864                 rdev->pfp_fw = NULL;
1865                 release_firmware(rdev->me_fw);
1866                 rdev->me_fw = NULL;
1867                 release_firmware(rdev->ce_fw);
1868                 rdev->ce_fw = NULL;
1869                 release_firmware(rdev->rlc_fw);
1870                 rdev->rlc_fw = NULL;
1871                 release_firmware(rdev->mc_fw);
1872                 rdev->mc_fw = NULL;
1873                 release_firmware(rdev->smc_fw);
1874                 rdev->smc_fw = NULL;
1875         }
1876         return err;
1877 }
1878
1879 /* watermark setup */
1880 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1881                                    struct radeon_crtc *radeon_crtc,
1882                                    struct drm_display_mode *mode,
1883                                    struct drm_display_mode *other_mode)
1884 {
1885         u32 tmp, buffer_alloc, i;
1886         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1887         /*
1888          * Line Buffer Setup
1889          * There are 3 line buffers, each one shared by 2 display controllers.
1890          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1891          * the display controllers.  The paritioning is done via one of four
1892          * preset allocations specified in bits 21:20:
1893          *  0 - half lb
1894          *  2 - whole lb, other crtc must be disabled
1895          */
1896         /* this can get tricky if we have two large displays on a paired group
1897          * of crtcs.  Ideally for multiple large displays we'd assign them to
1898          * non-linked crtcs for maximum line buffer allocation.
1899          */
1900         if (radeon_crtc->base.enabled && mode) {
1901                 if (other_mode) {
1902                         tmp = 0; /* 1/2 */
1903                         buffer_alloc = 1;
1904                 } else {
1905                         tmp = 2; /* whole */
1906                         buffer_alloc = 2;
1907                 }
1908         } else {
1909                 tmp = 0;
1910                 buffer_alloc = 0;
1911         }
1912
1913         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1914                DC_LB_MEMORY_CONFIG(tmp));
1915
1916         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1917                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1918         for (i = 0; i < rdev->usec_timeout; i++) {
1919                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1920                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1921                         break;
1922                 udelay(1);
1923         }
1924
1925         if (radeon_crtc->base.enabled && mode) {
1926                 switch (tmp) {
1927                 case 0:
1928                 default:
1929                         return 4096 * 2;
1930                 case 2:
1931                         return 8192 * 2;
1932                 }
1933         }
1934
1935         /* controller not enabled, so no lb used */
1936         return 0;
1937 }
1938
1939 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1940 {
1941         u32 tmp = RREG32(MC_SHARED_CHMAP);
1942
1943         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1944         case 0:
1945         default:
1946                 return 1;
1947         case 1:
1948                 return 2;
1949         case 2:
1950                 return 4;
1951         case 3:
1952                 return 8;
1953         case 4:
1954                 return 3;
1955         case 5:
1956                 return 6;
1957         case 6:
1958                 return 10;
1959         case 7:
1960                 return 12;
1961         case 8:
1962                 return 16;
1963         }
1964 }
1965
1966 struct dce6_wm_params {
1967         u32 dram_channels; /* number of dram channels */
1968         u32 yclk;          /* bandwidth per dram data pin in kHz */
1969         u32 sclk;          /* engine clock in kHz */
1970         u32 disp_clk;      /* display clock in kHz */
1971         u32 src_width;     /* viewport width */
1972         u32 active_time;   /* active display time in ns */
1973         u32 blank_time;    /* blank time in ns */
1974         bool interlaced;    /* mode is interlaced */
1975         fixed20_12 vsc;    /* vertical scale ratio */
1976         u32 num_heads;     /* number of active crtcs */
1977         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1978         u32 lb_size;       /* line buffer allocated to pipe */
1979         u32 vtaps;         /* vertical scaler taps */
1980 };
1981
1982 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1983 {
1984         /* Calculate raw DRAM Bandwidth */
1985         fixed20_12 dram_efficiency; /* 0.7 */
1986         fixed20_12 yclk, dram_channels, bandwidth;
1987         fixed20_12 a;
1988
1989         a.full = dfixed_const(1000);
1990         yclk.full = dfixed_const(wm->yclk);
1991         yclk.full = dfixed_div(yclk, a);
1992         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1993         a.full = dfixed_const(10);
1994         dram_efficiency.full = dfixed_const(7);
1995         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1996         bandwidth.full = dfixed_mul(dram_channels, yclk);
1997         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1998
1999         return dfixed_trunc(bandwidth);
2000 }
2001
2002 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2003 {
2004         /* Calculate DRAM Bandwidth and the part allocated to display. */
2005         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2006         fixed20_12 yclk, dram_channels, bandwidth;
2007         fixed20_12 a;
2008
2009         a.full = dfixed_const(1000);
2010         yclk.full = dfixed_const(wm->yclk);
2011         yclk.full = dfixed_div(yclk, a);
2012         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2013         a.full = dfixed_const(10);
2014         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2015         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2016         bandwidth.full = dfixed_mul(dram_channels, yclk);
2017         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2018
2019         return dfixed_trunc(bandwidth);
2020 }
2021
2022 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2023 {
2024         /* Calculate the display Data return Bandwidth */
2025         fixed20_12 return_efficiency; /* 0.8 */
2026         fixed20_12 sclk, bandwidth;
2027         fixed20_12 a;
2028
2029         a.full = dfixed_const(1000);
2030         sclk.full = dfixed_const(wm->sclk);
2031         sclk.full = dfixed_div(sclk, a);
2032         a.full = dfixed_const(10);
2033         return_efficiency.full = dfixed_const(8);
2034         return_efficiency.full = dfixed_div(return_efficiency, a);
2035         a.full = dfixed_const(32);
2036         bandwidth.full = dfixed_mul(a, sclk);
2037         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2038
2039         return dfixed_trunc(bandwidth);
2040 }
2041
2042 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2043 {
2044         return 32;
2045 }
2046
2047 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2048 {
2049         /* Calculate the DMIF Request Bandwidth */
2050         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2051         fixed20_12 disp_clk, sclk, bandwidth;
2052         fixed20_12 a, b1, b2;
2053         u32 min_bandwidth;
2054
2055         a.full = dfixed_const(1000);
2056         disp_clk.full = dfixed_const(wm->disp_clk);
2057         disp_clk.full = dfixed_div(disp_clk, a);
2058         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2059         b1.full = dfixed_mul(a, disp_clk);
2060
2061         a.full = dfixed_const(1000);
2062         sclk.full = dfixed_const(wm->sclk);
2063         sclk.full = dfixed_div(sclk, a);
2064         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2065         b2.full = dfixed_mul(a, sclk);
2066
2067         a.full = dfixed_const(10);
2068         disp_clk_request_efficiency.full = dfixed_const(8);
2069         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2070
2071         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2072
2073         a.full = dfixed_const(min_bandwidth);
2074         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2075
2076         return dfixed_trunc(bandwidth);
2077 }
2078
2079 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2080 {
2081         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2082         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2083         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2084         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2085
2086         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2087 }
2088
2089 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2090 {
2091         /* Calculate the display mode Average Bandwidth
2092          * DisplayMode should contain the source and destination dimensions,
2093          * timing, etc.
2094          */
2095         fixed20_12 bpp;
2096         fixed20_12 line_time;
2097         fixed20_12 src_width;
2098         fixed20_12 bandwidth;
2099         fixed20_12 a;
2100
2101         a.full = dfixed_const(1000);
2102         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2103         line_time.full = dfixed_div(line_time, a);
2104         bpp.full = dfixed_const(wm->bytes_per_pixel);
2105         src_width.full = dfixed_const(wm->src_width);
2106         bandwidth.full = dfixed_mul(src_width, bpp);
2107         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2108         bandwidth.full = dfixed_div(bandwidth, line_time);
2109
2110         return dfixed_trunc(bandwidth);
2111 }
2112
2113 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2114 {
2115         /* First calcualte the latency in ns */
2116         u32 mc_latency = 2000; /* 2000 ns. */
2117         u32 available_bandwidth = dce6_available_bandwidth(wm);
2118         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2119         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2120         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2121         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2122                 (wm->num_heads * cursor_line_pair_return_time);
2123         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2124         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2125         u32 tmp, dmif_size = 12288;
2126         fixed20_12 a, b, c;
2127
2128         if (wm->num_heads == 0)
2129                 return 0;
2130
2131         a.full = dfixed_const(2);
2132         b.full = dfixed_const(1);
2133         if ((wm->vsc.full > a.full) ||
2134             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2135             (wm->vtaps >= 5) ||
2136             ((wm->vsc.full >= a.full) && wm->interlaced))
2137                 max_src_lines_per_dst_line = 4;
2138         else
2139                 max_src_lines_per_dst_line = 2;
2140
2141         a.full = dfixed_const(available_bandwidth);
2142         b.full = dfixed_const(wm->num_heads);
2143         a.full = dfixed_div(a, b);
2144
2145         b.full = dfixed_const(mc_latency + 512);
2146         c.full = dfixed_const(wm->disp_clk);
2147         b.full = dfixed_div(b, c);
2148
2149         c.full = dfixed_const(dmif_size);
2150         b.full = dfixed_div(c, b);
2151
2152         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2153
2154         b.full = dfixed_const(1000);
2155         c.full = dfixed_const(wm->disp_clk);
2156         b.full = dfixed_div(c, b);
2157         c.full = dfixed_const(wm->bytes_per_pixel);
2158         b.full = dfixed_mul(b, c);
2159
2160         lb_fill_bw = min(tmp, dfixed_trunc(b));
2161
2162         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2163         b.full = dfixed_const(1000);
2164         c.full = dfixed_const(lb_fill_bw);
2165         b.full = dfixed_div(c, b);
2166         a.full = dfixed_div(a, b);
2167         line_fill_time = dfixed_trunc(a);
2168
2169         if (line_fill_time < wm->active_time)
2170                 return latency;
2171         else
2172                 return latency + (line_fill_time - wm->active_time);
2173
2174 }
2175
2176 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2177 {
2178         if (dce6_average_bandwidth(wm) <=
2179             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2180                 return true;
2181         else
2182                 return false;
2183 };
2184
2185 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2186 {
2187         if (dce6_average_bandwidth(wm) <=
2188             (dce6_available_bandwidth(wm) / wm->num_heads))
2189                 return true;
2190         else
2191                 return false;
2192 };
2193
2194 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2195 {
2196         u32 lb_partitions = wm->lb_size / wm->src_width;
2197         u32 line_time = wm->active_time + wm->blank_time;
2198         u32 latency_tolerant_lines;
2199         u32 latency_hiding;
2200         fixed20_12 a;
2201
2202         a.full = dfixed_const(1);
2203         if (wm->vsc.full > a.full)
2204                 latency_tolerant_lines = 1;
2205         else {
2206                 if (lb_partitions <= (wm->vtaps + 1))
2207                         latency_tolerant_lines = 1;
2208                 else
2209                         latency_tolerant_lines = 2;
2210         }
2211
2212         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2213
2214         if (dce6_latency_watermark(wm) <= latency_hiding)
2215                 return true;
2216         else
2217                 return false;
2218 }
2219
2220 static void dce6_program_watermarks(struct radeon_device *rdev,
2221                                          struct radeon_crtc *radeon_crtc,
2222                                          u32 lb_size, u32 num_heads)
2223 {
2224         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2225         struct dce6_wm_params wm_low, wm_high;
2226         u32 dram_channels;
2227         u32 pixel_period;
2228         u32 line_time = 0;
2229         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2230         u32 priority_a_mark = 0, priority_b_mark = 0;
2231         u32 priority_a_cnt = PRIORITY_OFF;
2232         u32 priority_b_cnt = PRIORITY_OFF;
2233         u32 tmp, arb_control3;
2234         fixed20_12 a, b, c;
2235
2236         if (radeon_crtc->base.enabled && num_heads && mode) {
2237                 pixel_period = 1000000 / (u32)mode->clock;
2238                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2239                 priority_a_cnt = 0;
2240                 priority_b_cnt = 0;
2241
2242                 if (rdev->family == CHIP_ARUBA)
2243                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2244                 else
2245                         dram_channels = si_get_number_of_dram_channels(rdev);
2246
2247                 /* watermark for high clocks */
2248                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2249                         wm_high.yclk =
2250                                 radeon_dpm_get_mclk(rdev, false) * 10;
2251                         wm_high.sclk =
2252                                 radeon_dpm_get_sclk(rdev, false) * 10;
2253                 } else {
2254                         wm_high.yclk = rdev->pm.current_mclk * 10;
2255                         wm_high.sclk = rdev->pm.current_sclk * 10;
2256                 }
2257
2258                 wm_high.disp_clk = mode->clock;
2259                 wm_high.src_width = mode->crtc_hdisplay;
2260                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2261                 wm_high.blank_time = line_time - wm_high.active_time;
2262                 wm_high.interlaced = false;
2263                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2264                         wm_high.interlaced = true;
2265                 wm_high.vsc = radeon_crtc->vsc;
2266                 wm_high.vtaps = 1;
2267                 if (radeon_crtc->rmx_type != RMX_OFF)
2268                         wm_high.vtaps = 2;
2269                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2270                 wm_high.lb_size = lb_size;
2271                 wm_high.dram_channels = dram_channels;
2272                 wm_high.num_heads = num_heads;
2273
2274                 /* watermark for low clocks */
2275                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2276                         wm_low.yclk =
2277                                 radeon_dpm_get_mclk(rdev, true) * 10;
2278                         wm_low.sclk =
2279                                 radeon_dpm_get_sclk(rdev, true) * 10;
2280                 } else {
2281                         wm_low.yclk = rdev->pm.current_mclk * 10;
2282                         wm_low.sclk = rdev->pm.current_sclk * 10;
2283                 }
2284
2285                 wm_low.disp_clk = mode->clock;
2286                 wm_low.src_width = mode->crtc_hdisplay;
2287                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2288                 wm_low.blank_time = line_time - wm_low.active_time;
2289                 wm_low.interlaced = false;
2290                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2291                         wm_low.interlaced = true;
2292                 wm_low.vsc = radeon_crtc->vsc;
2293                 wm_low.vtaps = 1;
2294                 if (radeon_crtc->rmx_type != RMX_OFF)
2295                         wm_low.vtaps = 2;
2296                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2297                 wm_low.lb_size = lb_size;
2298                 wm_low.dram_channels = dram_channels;
2299                 wm_low.num_heads = num_heads;
2300
2301                 /* set for high clocks */
2302                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2303                 /* set for low clocks */
2304                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2305
2306                 /* possibly force display priority to high */
2307                 /* should really do this at mode validation time... */
2308                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2309                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2310                     !dce6_check_latency_hiding(&wm_high) ||
2311                     (rdev->disp_priority == 2)) {
2312                         DRM_DEBUG_KMS("force priority to high\n");
2313                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2314                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2315                 }
2316                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2317                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2318                     !dce6_check_latency_hiding(&wm_low) ||
2319                     (rdev->disp_priority == 2)) {
2320                         DRM_DEBUG_KMS("force priority to high\n");
2321                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2322                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2323                 }
2324
2325                 a.full = dfixed_const(1000);
2326                 b.full = dfixed_const(mode->clock);
2327                 b.full = dfixed_div(b, a);
2328                 c.full = dfixed_const(latency_watermark_a);
2329                 c.full = dfixed_mul(c, b);
2330                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2331                 c.full = dfixed_div(c, a);
2332                 a.full = dfixed_const(16);
2333                 c.full = dfixed_div(c, a);
2334                 priority_a_mark = dfixed_trunc(c);
2335                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2336
2337                 a.full = dfixed_const(1000);
2338                 b.full = dfixed_const(mode->clock);
2339                 b.full = dfixed_div(b, a);
2340                 c.full = dfixed_const(latency_watermark_b);
2341                 c.full = dfixed_mul(c, b);
2342                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2343                 c.full = dfixed_div(c, a);
2344                 a.full = dfixed_const(16);
2345                 c.full = dfixed_div(c, a);
2346                 priority_b_mark = dfixed_trunc(c);
2347                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2348         }
2349
2350         /* select wm A */
2351         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2352         tmp = arb_control3;
2353         tmp &= ~LATENCY_WATERMARK_MASK(3);
2354         tmp |= LATENCY_WATERMARK_MASK(1);
2355         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2356         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2357                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2358                 LATENCY_HIGH_WATERMARK(line_time)));
2359         /* select wm B */
2360         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2361         tmp &= ~LATENCY_WATERMARK_MASK(3);
2362         tmp |= LATENCY_WATERMARK_MASK(2);
2363         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2364         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2365                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2366                 LATENCY_HIGH_WATERMARK(line_time)));
2367         /* restore original selection */
2368         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2369
2370         /* write the priority marks */
2371         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2372         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2373
2374         /* save values for DPM */
2375         radeon_crtc->line_time = line_time;
2376         radeon_crtc->wm_high = latency_watermark_a;
2377         radeon_crtc->wm_low = latency_watermark_b;
2378 }
2379
2380 void dce6_bandwidth_update(struct radeon_device *rdev)
2381 {
2382         struct drm_display_mode *mode0 = NULL;
2383         struct drm_display_mode *mode1 = NULL;
2384         u32 num_heads = 0, lb_size;
2385         int i;
2386
2387         if (!rdev->mode_info.mode_config_initialized)
2388                 return;
2389
2390         radeon_update_display_priority(rdev);
2391
2392         for (i = 0; i < rdev->num_crtc; i++) {
2393                 if (rdev->mode_info.crtcs[i]->base.enabled)
2394                         num_heads++;
2395         }
2396         for (i = 0; i < rdev->num_crtc; i += 2) {
2397                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2398                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2399                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2400                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2401                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2402                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2403         }
2404 }
2405
2406 /*
2407  * Core functions
2408  */
2409 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2410 {
2411         const u32 num_tile_mode_states = 32;
2412         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2413
2414         switch (rdev->config.si.mem_row_size_in_kb) {
2415         case 1:
2416                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2417                 break;
2418         case 2:
2419         default:
2420                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2421                 break;
2422         case 4:
2423                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2424                 break;
2425         }
2426
2427         if ((rdev->family == CHIP_TAHITI) ||
2428             (rdev->family == CHIP_PITCAIRN)) {
2429                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2430                         switch (reg_offset) {
2431                         case 0:  /* non-AA compressed depth or any compressed stencil */
2432                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2434                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2435                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2436                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2437                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2440                                 break;
2441                         case 1:  /* 2xAA/4xAA compressed depth only */
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2445                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2446                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2447                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2450                                 break;
2451                         case 2:  /* 8xAA compressed depth only */
2452                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2454                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2455                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2456                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2457                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2460                                 break;
2461                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2462                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2464                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2465                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2466                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2467                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2469                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2470                                 break;
2471                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2472                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2473                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2474                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2475                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2476                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2477                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2480                                 break;
2481                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2482                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2483                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2484                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2485                                                  TILE_SPLIT(split_equal_to_row_size) |
2486                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2487                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2490                                 break;
2491                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2492                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2494                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2495                                                  TILE_SPLIT(split_equal_to_row_size) |
2496                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2497                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2500                                 break;
2501                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2502                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2504                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505                                                  TILE_SPLIT(split_equal_to_row_size) |
2506                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2507                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2510                                 break;
2511                         case 8:  /* 1D and 1D Array Surfaces */
2512                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2513                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2514                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2515                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2516                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2517                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2520                                 break;
2521                         case 9:  /* Displayable maps. */
2522                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2525                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2526                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2527                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2529                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2530                                 break;
2531                         case 10:  /* Display 8bpp. */
2532                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2534                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2535                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2536                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2537                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2540                                 break;
2541                         case 11:  /* Display 16bpp. */
2542                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2546                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2547                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550                                 break;
2551                         case 12:  /* Display 32bpp. */
2552                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2556                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2557                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2560                                 break;
2561                         case 13:  /* Thin. */
2562                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2563                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2564                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2565                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2566                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2567                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2569                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2570                                 break;
2571                         case 14:  /* Thin 8 bpp. */
2572                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2574                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2575                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2576                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2577                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2580                                 break;
2581                         case 15:  /* Thin 16 bpp. */
2582                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2584                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2586                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2587                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2589                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2590                                 break;
2591                         case 16:  /* Thin 32 bpp. */
2592                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2594                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2596                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2597                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2599                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2600                                 break;
2601                         case 17:  /* Thin 64 bpp. */
2602                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2604                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2605                                                  TILE_SPLIT(split_equal_to_row_size) |
2606                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2607                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2609                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2610                                 break;
2611                         case 21:  /* 8 bpp PRT. */
2612                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2614                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2615                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2617                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2618                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2619                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2620                                 break;
2621                         case 22:  /* 16 bpp PRT */
2622                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2624                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2626                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2627                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2630                                 break;
2631                         case 23:  /* 32 bpp PRT */
2632                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2636                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2637                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640                                 break;
2641                         case 24:  /* 64 bpp PRT */
2642                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2644                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2645                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2646                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2647                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2650                                 break;
2651                         case 25:  /* 128 bpp PRT */
2652                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2654                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2655                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2656                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2657                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2660                                 break;
2661                         default:
2662                                 gb_tile_moden = 0;
2663                                 break;
2664                         }
2665                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2666                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2667                 }
2668         } else if ((rdev->family == CHIP_VERDE) ||
2669                    (rdev->family == CHIP_OLAND) ||
2670                    (rdev->family == CHIP_HAINAN)) {
2671                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2672                         switch (reg_offset) {
2673                         case 0:  /* non-AA compressed depth or any compressed stencil */
2674                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2676                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2678                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2679                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2682                                 break;
2683                         case 1:  /* 2xAA/4xAA compressed depth only */
2684                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2689                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2691                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2692                                 break;
2693                         case 2:  /* 8xAA compressed depth only */
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2696                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2698                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2699                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2701                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2702                                 break;
2703                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2704                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2706                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2707                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2708                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2709                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2712                                 break;
2713                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2714                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2716                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2719                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2722                                 break;
2723                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2724                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2726                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727                                                  TILE_SPLIT(split_equal_to_row_size) |
2728                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2729                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2731                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2732                                 break;
2733                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2734                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2736                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2737                                                  TILE_SPLIT(split_equal_to_row_size) |
2738                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2739                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2742                                 break;
2743                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2744                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2745                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2746                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747                                                  TILE_SPLIT(split_equal_to_row_size) |
2748                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2749                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2752                                 break;
2753                         case 8:  /* 1D and 1D Array Surfaces */
2754                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2755                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2756                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2757                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2758                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2759                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2761                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2762                                 break;
2763                         case 9:  /* Displayable maps. */
2764                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2765                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2768                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2769                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2770                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2771                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2772                                 break;
2773                         case 10:  /* Display 8bpp. */
2774                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2776                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2777                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2778                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2779                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2781                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2782                                 break;
2783                         case 11:  /* Display 16bpp. */
2784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2786                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2788                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2789                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2791                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2792                                 break;
2793                         case 12:  /* Display 32bpp. */
2794                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2796                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2798                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2799                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2801                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2802                                 break;
2803                         case 13:  /* Thin. */
2804                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2806                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2807                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2808                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2809                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2811                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2812                                 break;
2813                         case 14:  /* Thin 8 bpp. */
2814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2816                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2818                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2819                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2822                                 break;
2823                         case 15:  /* Thin 16 bpp. */
2824                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2825                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2826                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2828                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2829                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2831                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2832                                 break;
2833                         case 16:  /* Thin 32 bpp. */
2834                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2835                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2836                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2837                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2838                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2839                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2842                                 break;
2843                         case 17:  /* Thin 64 bpp. */
2844                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2846                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2847                                                  TILE_SPLIT(split_equal_to_row_size) |
2848                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2849                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2852                                 break;
2853                         case 21:  /* 8 bpp PRT. */
2854                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2855                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2856                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2857                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2858                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2859                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2861                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2862                                 break;
2863                         case 22:  /* 16 bpp PRT */
2864                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2865                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2866                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2867                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2868                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2869                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2871                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2872                                 break;
2873                         case 23:  /* 32 bpp PRT */
2874                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2877                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2878                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2879                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2881                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882                                 break;
2883                         case 24:  /* 64 bpp PRT */
2884                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2886                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2887                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2888                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2889                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2892                                 break;
2893                         case 25:  /* 128 bpp PRT */
2894                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2896                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2897                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2898                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2899                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2902                                 break;
2903                         default:
2904                                 gb_tile_moden = 0;
2905                                 break;
2906                         }
2907                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2908                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2909                 }
2910         } else
2911                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2912 }
2913
2914 static void si_select_se_sh(struct radeon_device *rdev,
2915                             u32 se_num, u32 sh_num)
2916 {
2917         u32 data = INSTANCE_BROADCAST_WRITES;
2918
2919         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2920                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2921         else if (se_num == 0xffffffff)
2922                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2923         else if (sh_num == 0xffffffff)
2924                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2925         else
2926                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2927         WREG32(GRBM_GFX_INDEX, data);
2928 }
2929
2930 static u32 si_create_bitmask(u32 bit_width)
2931 {
2932         u32 i, mask = 0;
2933
2934         for (i = 0; i < bit_width; i++) {
2935                 mask <<= 1;
2936                 mask |= 1;
2937         }
2938         return mask;
2939 }
2940
2941 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2942 {
2943         u32 data, mask;
2944
2945         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2946         if (data & 1)
2947                 data &= INACTIVE_CUS_MASK;
2948         else
2949                 data = 0;
2950         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2951
2952         data >>= INACTIVE_CUS_SHIFT;
2953
2954         mask = si_create_bitmask(cu_per_sh);
2955
2956         return ~data & mask;
2957 }
2958
2959 static void si_setup_spi(struct radeon_device *rdev,
2960                          u32 se_num, u32 sh_per_se,
2961                          u32 cu_per_sh)
2962 {
2963         int i, j, k;
2964         u32 data, mask, active_cu;
2965
2966         for (i = 0; i < se_num; i++) {
2967                 for (j = 0; j < sh_per_se; j++) {
2968                         si_select_se_sh(rdev, i, j);
2969                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2970                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2971
2972                         mask = 1;
2973                         for (k = 0; k < 16; k++) {
2974                                 mask <<= k;
2975                                 if (active_cu & mask) {
2976                                         data &= ~mask;
2977                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2978                                         break;
2979                                 }
2980                         }
2981                 }
2982         }
2983         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2984 }
2985
2986 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2987                               u32 max_rb_num_per_se,
2988                               u32 sh_per_se)
2989 {
2990         u32 data, mask;
2991
2992         data = RREG32(CC_RB_BACKEND_DISABLE);
2993         if (data & 1)
2994                 data &= BACKEND_DISABLE_MASK;
2995         else
2996                 data = 0;
2997         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2998
2999         data >>= BACKEND_DISABLE_SHIFT;
3000
3001         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3002
3003         return data & mask;
3004 }
3005
3006 static void si_setup_rb(struct radeon_device *rdev,
3007                         u32 se_num, u32 sh_per_se,
3008                         u32 max_rb_num_per_se)
3009 {
3010         int i, j;
3011         u32 data, mask;
3012         u32 disabled_rbs = 0;
3013         u32 enabled_rbs = 0;
3014
3015         for (i = 0; i < se_num; i++) {
3016                 for (j = 0; j < sh_per_se; j++) {
3017                         si_select_se_sh(rdev, i, j);
3018                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3019                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3020                 }
3021         }
3022         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3023
3024         mask = 1;
3025         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3026                 if (!(disabled_rbs & mask))
3027                         enabled_rbs |= mask;
3028                 mask <<= 1;
3029         }
3030
3031         rdev->config.si.backend_enable_mask = enabled_rbs;
3032
3033         for (i = 0; i < se_num; i++) {
3034                 si_select_se_sh(rdev, i, 0xffffffff);
3035                 data = 0;
3036                 for (j = 0; j < sh_per_se; j++) {
3037                         switch (enabled_rbs & 3) {
3038                         case 1:
3039                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3040                                 break;
3041                         case 2:
3042                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3043                                 break;
3044                         case 3:
3045                         default:
3046                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3047                                 break;
3048                         }
3049                         enabled_rbs >>= 2;
3050                 }
3051                 WREG32(PA_SC_RASTER_CONFIG, data);
3052         }
3053         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3054 }
3055
3056 static void si_gpu_init(struct radeon_device *rdev)
3057 {
3058         u32 gb_addr_config = 0;
3059         u32 mc_shared_chmap, mc_arb_ramcfg;
3060         u32 sx_debug_1;
3061         u32 hdp_host_path_cntl;
3062         u32 tmp;
3063         int i, j;
3064
3065         switch (rdev->family) {
3066         case CHIP_TAHITI:
3067                 rdev->config.si.max_shader_engines = 2;
3068                 rdev->config.si.max_tile_pipes = 12;
3069                 rdev->config.si.max_cu_per_sh = 8;
3070                 rdev->config.si.max_sh_per_se = 2;
3071                 rdev->config.si.max_backends_per_se = 4;
3072                 rdev->config.si.max_texture_channel_caches = 12;
3073                 rdev->config.si.max_gprs = 256;
3074                 rdev->config.si.max_gs_threads = 32;
3075                 rdev->config.si.max_hw_contexts = 8;
3076
3077                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3078                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3079                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3080                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3081                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3082                 break;
3083         case CHIP_PITCAIRN:
3084                 rdev->config.si.max_shader_engines = 2;
3085                 rdev->config.si.max_tile_pipes = 8;
3086                 rdev->config.si.max_cu_per_sh = 5;
3087                 rdev->config.si.max_sh_per_se = 2;
3088                 rdev->config.si.max_backends_per_se = 4;
3089                 rdev->config.si.max_texture_channel_caches = 8;
3090                 rdev->config.si.max_gprs = 256;
3091                 rdev->config.si.max_gs_threads = 32;
3092                 rdev->config.si.max_hw_contexts = 8;
3093
3094                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3095                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3096                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3097                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3098                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3099                 break;
3100         case CHIP_VERDE:
3101         default:
3102                 rdev->config.si.max_shader_engines = 1;
3103                 rdev->config.si.max_tile_pipes = 4;
3104                 rdev->config.si.max_cu_per_sh = 5;
3105                 rdev->config.si.max_sh_per_se = 2;
3106                 rdev->config.si.max_backends_per_se = 4;
3107                 rdev->config.si.max_texture_channel_caches = 4;
3108                 rdev->config.si.max_gprs = 256;
3109                 rdev->config.si.max_gs_threads = 32;
3110                 rdev->config.si.max_hw_contexts = 8;
3111
3112                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3113                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3114                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3115                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3116                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3117                 break;
3118         case CHIP_OLAND:
3119                 rdev->config.si.max_shader_engines = 1;
3120                 rdev->config.si.max_tile_pipes = 4;
3121                 rdev->config.si.max_cu_per_sh = 6;
3122                 rdev->config.si.max_sh_per_se = 1;
3123                 rdev->config.si.max_backends_per_se = 2;
3124                 rdev->config.si.max_texture_channel_caches = 4;
3125                 rdev->config.si.max_gprs = 256;
3126                 rdev->config.si.max_gs_threads = 16;
3127                 rdev->config.si.max_hw_contexts = 8;
3128
3129                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3130                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3131                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3132                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3133                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3134                 break;
3135         case CHIP_HAINAN:
3136                 rdev->config.si.max_shader_engines = 1;
3137                 rdev->config.si.max_tile_pipes = 4;
3138                 rdev->config.si.max_cu_per_sh = 5;
3139                 rdev->config.si.max_sh_per_se = 1;
3140                 rdev->config.si.max_backends_per_se = 1;
3141                 rdev->config.si.max_texture_channel_caches = 2;
3142                 rdev->config.si.max_gprs = 256;
3143                 rdev->config.si.max_gs_threads = 16;
3144                 rdev->config.si.max_hw_contexts = 8;
3145
3146                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3147                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3148                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3149                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3150                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3151                 break;
3152         }
3153
3154         /* Initialize HDP */
3155         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3156                 WREG32((0x2c14 + j), 0x00000000);
3157                 WREG32((0x2c18 + j), 0x00000000);
3158                 WREG32((0x2c1c + j), 0x00000000);
3159                 WREG32((0x2c20 + j), 0x00000000);
3160                 WREG32((0x2c24 + j), 0x00000000);
3161         }
3162
3163         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3164
3165         evergreen_fix_pci_max_read_req_size(rdev);
3166
3167         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3168
3169         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3170         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3171
3172         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3173         rdev->config.si.mem_max_burst_length_bytes = 256;
3174         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3175         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3176         if (rdev->config.si.mem_row_size_in_kb > 4)
3177                 rdev->config.si.mem_row_size_in_kb = 4;
3178         /* XXX use MC settings? */
3179         rdev->config.si.shader_engine_tile_size = 32;
3180         rdev->config.si.num_gpus = 1;
3181         rdev->config.si.multi_gpu_tile_size = 64;
3182
3183         /* fix up row size */
3184         gb_addr_config &= ~ROW_SIZE_MASK;
3185         switch (rdev->config.si.mem_row_size_in_kb) {
3186         case 1:
3187         default:
3188                 gb_addr_config |= ROW_SIZE(0);
3189                 break;
3190         case 2:
3191                 gb_addr_config |= ROW_SIZE(1);
3192                 break;
3193         case 4:
3194                 gb_addr_config |= ROW_SIZE(2);
3195                 break;
3196         }
3197
3198         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3199          * not have bank info, so create a custom tiling dword.
3200          * bits 3:0   num_pipes
3201          * bits 7:4   num_banks
3202          * bits 11:8  group_size
3203          * bits 15:12 row_size
3204          */
3205         rdev->config.si.tile_config = 0;
3206         switch (rdev->config.si.num_tile_pipes) {
3207         case 1:
3208                 rdev->config.si.tile_config |= (0 << 0);
3209                 break;
3210         case 2:
3211                 rdev->config.si.tile_config |= (1 << 0);
3212                 break;
3213         case 4:
3214                 rdev->config.si.tile_config |= (2 << 0);
3215                 break;
3216         case 8:
3217         default:
3218                 /* XXX what about 12? */
3219                 rdev->config.si.tile_config |= (3 << 0);
3220                 break;
3221         }       
3222         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3223         case 0: /* four banks */
3224                 rdev->config.si.tile_config |= 0 << 4;
3225                 break;
3226         case 1: /* eight banks */
3227                 rdev->config.si.tile_config |= 1 << 4;
3228                 break;
3229         case 2: /* sixteen banks */
3230         default:
3231                 rdev->config.si.tile_config |= 2 << 4;
3232                 break;
3233         }
3234         rdev->config.si.tile_config |=
3235                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3236         rdev->config.si.tile_config |=
3237                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3238
3239         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3240         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3241         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3242         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3243         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3244         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3245         if (rdev->has_uvd) {
3246                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3247                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3248                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3249         }
3250
3251         si_tiling_mode_table_init(rdev);
3252
3253         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3254                     rdev->config.si.max_sh_per_se,
3255                     rdev->config.si.max_backends_per_se);
3256
3257         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3258                      rdev->config.si.max_sh_per_se,
3259                      rdev->config.si.max_cu_per_sh);
3260
3261         rdev->config.si.active_cus = 0;
3262         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3263                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3264                         rdev->config.si.active_cus +=
3265                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3266                 }
3267         }
3268
3269         /* set HW defaults for 3D engine */
3270         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3271                                      ROQ_IB2_START(0x2b)));
3272         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3273
3274         sx_debug_1 = RREG32(SX_DEBUG_1);
3275         WREG32(SX_DEBUG_1, sx_debug_1);
3276
3277         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3278
3279         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3280                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3281                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3282                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3283
3284         WREG32(VGT_NUM_INSTANCES, 1);
3285
3286         WREG32(CP_PERFMON_CNTL, 0);
3287
3288         WREG32(SQ_CONFIG, 0);
3289
3290         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3291                                           FORCE_EOV_MAX_REZ_CNT(255)));
3292
3293         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3294                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3295
3296         WREG32(VGT_GS_VERTEX_REUSE, 16);
3297         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3298
3299         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3300         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3301         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3302         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3303         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3304         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3305         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3306         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3307
3308         tmp = RREG32(HDP_MISC_CNTL);
3309         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3310         WREG32(HDP_MISC_CNTL, tmp);
3311
3312         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3313         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3314
3315         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3316
3317         udelay(50);
3318 }
3319
3320 /*
3321  * GPU scratch registers helpers function.
3322  */
3323 static void si_scratch_init(struct radeon_device *rdev)
3324 {
3325         int i;
3326
3327         rdev->scratch.num_reg = 7;
3328         rdev->scratch.reg_base = SCRATCH_REG0;
3329         for (i = 0; i < rdev->scratch.num_reg; i++) {
3330                 rdev->scratch.free[i] = true;
3331                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3332         }
3333 }
3334
3335 void si_fence_ring_emit(struct radeon_device *rdev,
3336                         struct radeon_fence *fence)
3337 {
3338         struct radeon_ring *ring = &rdev->ring[fence->ring];
3339         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3340
3341         /* flush read cache over gart */
3342         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3343         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3344         radeon_ring_write(ring, 0);
3345         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3346         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3347                           PACKET3_TC_ACTION_ENA |
3348                           PACKET3_SH_KCACHE_ACTION_ENA |
3349                           PACKET3_SH_ICACHE_ACTION_ENA);
3350         radeon_ring_write(ring, 0xFFFFFFFF);
3351         radeon_ring_write(ring, 0);
3352         radeon_ring_write(ring, 10); /* poll interval */
3353         /* EVENT_WRITE_EOP - flush caches, send int */
3354         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3355         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3356         radeon_ring_write(ring, lower_32_bits(addr));
3357         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3358         radeon_ring_write(ring, fence->seq);
3359         radeon_ring_write(ring, 0);
3360 }
3361
3362 /*
3363  * IB stuff
3364  */
3365 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3366 {
3367         struct radeon_ring *ring = &rdev->ring[ib->ring];
3368         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3369         u32 header;
3370
3371         if (ib->is_const_ib) {
3372                 /* set switch buffer packet before const IB */
3373                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3374                 radeon_ring_write(ring, 0);
3375
3376                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3377         } else {
3378                 u32 next_rptr;
3379                 if (ring->rptr_save_reg) {
3380                         next_rptr = ring->wptr + 3 + 4 + 8;
3381                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3382                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3383                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3384                         radeon_ring_write(ring, next_rptr);
3385                 } else if (rdev->wb.enabled) {
3386                         next_rptr = ring->wptr + 5 + 4 + 8;
3387                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3388                         radeon_ring_write(ring, (1 << 8));
3389                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3390                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3391                         radeon_ring_write(ring, next_rptr);
3392                 }
3393
3394                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3395         }
3396
3397         radeon_ring_write(ring, header);
3398         radeon_ring_write(ring,
3399 #ifdef __BIG_ENDIAN
3400                           (2 << 0) |
3401 #endif
3402                           (ib->gpu_addr & 0xFFFFFFFC));
3403         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3404         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3405
3406         if (!ib->is_const_ib) {
3407                 /* flush read cache over gart for this vmid */
3408                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3409                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3410                 radeon_ring_write(ring, vm_id);
3411                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3412                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3413                                   PACKET3_TC_ACTION_ENA |
3414                                   PACKET3_SH_KCACHE_ACTION_ENA |
3415                                   PACKET3_SH_ICACHE_ACTION_ENA);
3416                 radeon_ring_write(ring, 0xFFFFFFFF);
3417                 radeon_ring_write(ring, 0);
3418                 radeon_ring_write(ring, 10); /* poll interval */
3419         }
3420 }
3421
3422 /*
3423  * CP.
3424  */
3425 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3426 {
3427         if (enable)
3428                 WREG32(CP_ME_CNTL, 0);
3429         else {
3430                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3431                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3432                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3433                 WREG32(SCRATCH_UMSK, 0);
3434                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3435                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3436                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3437         }
3438         udelay(50);
3439 }
3440
3441 static int si_cp_load_microcode(struct radeon_device *rdev)
3442 {
3443         int i;
3444
3445         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3446                 return -EINVAL;
3447
3448         si_cp_enable(rdev, false);
3449
3450         if (rdev->new_fw) {
3451                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3452                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3453                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3454                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3455                 const struct gfx_firmware_header_v1_0 *me_hdr =
3456                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3457                 const __le32 *fw_data;
3458                 u32 fw_size;
3459
3460                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3461                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3462                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3463
3464                 /* PFP */
3465                 fw_data = (const __le32 *)
3466                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3467                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3468                 WREG32(CP_PFP_UCODE_ADDR, 0);
3469                 for (i = 0; i < fw_size; i++)
3470                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3471                 WREG32(CP_PFP_UCODE_ADDR, 0);
3472
3473                 /* CE */
3474                 fw_data = (const __le32 *)
3475                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3476                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3477                 WREG32(CP_CE_UCODE_ADDR, 0);
3478                 for (i = 0; i < fw_size; i++)
3479                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3480                 WREG32(CP_CE_UCODE_ADDR, 0);
3481
3482                 /* ME */
3483                 fw_data = (const __be32 *)
3484                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3485                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3486                 WREG32(CP_ME_RAM_WADDR, 0);
3487                 for (i = 0; i < fw_size; i++)
3488                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3489                 WREG32(CP_ME_RAM_WADDR, 0);
3490         } else {
3491                 const __be32 *fw_data;
3492
3493                 /* PFP */
3494                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3495                 WREG32(CP_PFP_UCODE_ADDR, 0);
3496                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3497                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3498                 WREG32(CP_PFP_UCODE_ADDR, 0);
3499
3500                 /* CE */
3501                 fw_data = (const __be32 *)rdev->ce_fw->data;
3502                 WREG32(CP_CE_UCODE_ADDR, 0);
3503                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3504                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3505                 WREG32(CP_CE_UCODE_ADDR, 0);
3506
3507                 /* ME */
3508                 fw_data = (const __be32 *)rdev->me_fw->data;
3509                 WREG32(CP_ME_RAM_WADDR, 0);
3510                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3511                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3512                 WREG32(CP_ME_RAM_WADDR, 0);
3513         }
3514
3515         WREG32(CP_PFP_UCODE_ADDR, 0);
3516         WREG32(CP_CE_UCODE_ADDR, 0);
3517         WREG32(CP_ME_RAM_WADDR, 0);
3518         WREG32(CP_ME_RAM_RADDR, 0);
3519         return 0;
3520 }
3521
3522 static int si_cp_start(struct radeon_device *rdev)
3523 {
3524         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3525         int r, i;
3526
3527         r = radeon_ring_lock(rdev, ring, 7 + 4);
3528         if (r) {
3529                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3530                 return r;
3531         }
3532         /* init the CP */
3533         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3534         radeon_ring_write(ring, 0x1);
3535         radeon_ring_write(ring, 0x0);
3536         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3537         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3538         radeon_ring_write(ring, 0);
3539         radeon_ring_write(ring, 0);
3540
3541         /* init the CE partitions */
3542         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3543         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3544         radeon_ring_write(ring, 0xc000);
3545         radeon_ring_write(ring, 0xe000);
3546         radeon_ring_unlock_commit(rdev, ring, false);
3547
3548         si_cp_enable(rdev, true);
3549
3550         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3551         if (r) {
3552                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3553                 return r;
3554         }
3555
3556         /* setup clear context state */
3557         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3558         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3559
3560         for (i = 0; i < si_default_size; i++)
3561                 radeon_ring_write(ring, si_default_state[i]);
3562
3563         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3564         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3565
3566         /* set clear context state */
3567         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3568         radeon_ring_write(ring, 0);
3569
3570         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3571         radeon_ring_write(ring, 0x00000316);
3572         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3573         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3574
3575         radeon_ring_unlock_commit(rdev, ring, false);
3576
3577         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3578                 ring = &rdev->ring[i];
3579                 r = radeon_ring_lock(rdev, ring, 2);
3580
3581                 /* clear the compute context state */
3582                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3583                 radeon_ring_write(ring, 0);
3584
3585                 radeon_ring_unlock_commit(rdev, ring, false);
3586         }
3587
3588         return 0;
3589 }
3590
3591 static void si_cp_fini(struct radeon_device *rdev)
3592 {
3593         struct radeon_ring *ring;
3594         si_cp_enable(rdev, false);
3595
3596         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3597         radeon_ring_fini(rdev, ring);
3598         radeon_scratch_free(rdev, ring->rptr_save_reg);
3599
3600         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3601         radeon_ring_fini(rdev, ring);
3602         radeon_scratch_free(rdev, ring->rptr_save_reg);
3603
3604         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3605         radeon_ring_fini(rdev, ring);
3606         radeon_scratch_free(rdev, ring->rptr_save_reg);
3607 }
3608
3609 static int si_cp_resume(struct radeon_device *rdev)
3610 {
3611         struct radeon_ring *ring;
3612         u32 tmp;
3613         u32 rb_bufsz;
3614         int r;
3615
3616         si_enable_gui_idle_interrupt(rdev, false);
3617
3618         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3619         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3620
3621         /* Set the write pointer delay */
3622         WREG32(CP_RB_WPTR_DELAY, 0);
3623
3624         WREG32(CP_DEBUG, 0);
3625         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3626
3627         /* ring 0 - compute and gfx */
3628         /* Set ring buffer size */
3629         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3630         rb_bufsz = order_base_2(ring->ring_size / 8);
3631         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3632 #ifdef __BIG_ENDIAN
3633         tmp |= BUF_SWAP_32BIT;
3634 #endif
3635         WREG32(CP_RB0_CNTL, tmp);
3636
3637         /* Initialize the ring buffer's read and write pointers */
3638         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3639         ring->wptr = 0;
3640         WREG32(CP_RB0_WPTR, ring->wptr);
3641
3642         /* set the wb address whether it's enabled or not */
3643         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3644         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3645
3646         if (rdev->wb.enabled)
3647                 WREG32(SCRATCH_UMSK, 0xff);
3648         else {
3649                 tmp |= RB_NO_UPDATE;
3650                 WREG32(SCRATCH_UMSK, 0);
3651         }
3652
3653         mdelay(1);
3654         WREG32(CP_RB0_CNTL, tmp);
3655
3656         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3657
3658         /* ring1  - compute only */
3659         /* Set ring buffer size */
3660         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3661         rb_bufsz = order_base_2(ring->ring_size / 8);
3662         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3663 #ifdef __BIG_ENDIAN
3664         tmp |= BUF_SWAP_32BIT;
3665 #endif
3666         WREG32(CP_RB1_CNTL, tmp);
3667
3668         /* Initialize the ring buffer's read and write pointers */
3669         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3670         ring->wptr = 0;
3671         WREG32(CP_RB1_WPTR, ring->wptr);
3672
3673         /* set the wb address whether it's enabled or not */
3674         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3675         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3676
3677         mdelay(1);
3678         WREG32(CP_RB1_CNTL, tmp);
3679
3680         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3681
3682         /* ring2 - compute only */
3683         /* Set ring buffer size */
3684         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3685         rb_bufsz = order_base_2(ring->ring_size / 8);
3686         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3687 #ifdef __BIG_ENDIAN
3688         tmp |= BUF_SWAP_32BIT;
3689 #endif
3690         WREG32(CP_RB2_CNTL, tmp);
3691
3692         /* Initialize the ring buffer's read and write pointers */
3693         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3694         ring->wptr = 0;
3695         WREG32(CP_RB2_WPTR, ring->wptr);
3696
3697         /* set the wb address whether it's enabled or not */
3698         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3699         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3700
3701         mdelay(1);
3702         WREG32(CP_RB2_CNTL, tmp);
3703
3704         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3705
3706         /* start the rings */
3707         si_cp_start(rdev);
3708         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3709         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3710         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3711         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3712         if (r) {
3713                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3714                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3715                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3716                 return r;
3717         }
3718         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3719         if (r) {
3720                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3721         }
3722         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3723         if (r) {
3724                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3725         }
3726
3727         si_enable_gui_idle_interrupt(rdev, true);
3728
3729         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3730                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3731
3732         return 0;
3733 }
3734
3735 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3736 {
3737         u32 reset_mask = 0;
3738         u32 tmp;
3739
3740         /* GRBM_STATUS */
3741         tmp = RREG32(GRBM_STATUS);
3742         if (tmp & (PA_BUSY | SC_BUSY |
3743                    BCI_BUSY | SX_BUSY |
3744                    TA_BUSY | VGT_BUSY |
3745                    DB_BUSY | CB_BUSY |
3746                    GDS_BUSY | SPI_BUSY |
3747                    IA_BUSY | IA_BUSY_NO_DMA))
3748                 reset_mask |= RADEON_RESET_GFX;
3749
3750         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3751                    CP_BUSY | CP_COHERENCY_BUSY))
3752                 reset_mask |= RADEON_RESET_CP;
3753
3754         if (tmp & GRBM_EE_BUSY)
3755                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3756
3757         /* GRBM_STATUS2 */
3758         tmp = RREG32(GRBM_STATUS2);
3759         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3760                 reset_mask |= RADEON_RESET_RLC;
3761
3762         /* DMA_STATUS_REG 0 */
3763         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3764         if (!(tmp & DMA_IDLE))
3765                 reset_mask |= RADEON_RESET_DMA;
3766
3767         /* DMA_STATUS_REG 1 */
3768         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3769         if (!(tmp & DMA_IDLE))
3770                 reset_mask |= RADEON_RESET_DMA1;
3771
3772         /* SRBM_STATUS2 */
3773         tmp = RREG32(SRBM_STATUS2);
3774         if (tmp & DMA_BUSY)
3775                 reset_mask |= RADEON_RESET_DMA;
3776
3777         if (tmp & DMA1_BUSY)
3778                 reset_mask |= RADEON_RESET_DMA1;
3779
3780         /* SRBM_STATUS */
3781         tmp = RREG32(SRBM_STATUS);
3782
3783         if (tmp & IH_BUSY)
3784                 reset_mask |= RADEON_RESET_IH;
3785
3786         if (tmp & SEM_BUSY)
3787                 reset_mask |= RADEON_RESET_SEM;
3788
3789         if (tmp & GRBM_RQ_PENDING)
3790                 reset_mask |= RADEON_RESET_GRBM;
3791
3792         if (tmp & VMC_BUSY)
3793                 reset_mask |= RADEON_RESET_VMC;
3794
3795         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3796                    MCC_BUSY | MCD_BUSY))
3797                 reset_mask |= RADEON_RESET_MC;
3798
3799         if (evergreen_is_display_hung(rdev))
3800                 reset_mask |= RADEON_RESET_DISPLAY;
3801
3802         /* VM_L2_STATUS */
3803         tmp = RREG32(VM_L2_STATUS);
3804         if (tmp & L2_BUSY)
3805                 reset_mask |= RADEON_RESET_VMC;
3806
3807         /* Skip MC reset as it's mostly likely not hung, just busy */
3808         if (reset_mask & RADEON_RESET_MC) {
3809                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3810                 reset_mask &= ~RADEON_RESET_MC;
3811         }
3812
3813         return reset_mask;
3814 }
3815
3816 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3817 {
3818         struct evergreen_mc_save save;
3819         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3820         u32 tmp;
3821
3822         if (reset_mask == 0)
3823                 return;
3824
3825         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3826
3827         evergreen_print_gpu_status_regs(rdev);
3828         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3829                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3830         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3831                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3832
3833         /* disable PG/CG */
3834         si_fini_pg(rdev);
3835         si_fini_cg(rdev);
3836
3837         /* stop the rlc */
3838         si_rlc_stop(rdev);
3839
3840         /* Disable CP parsing/prefetching */
3841         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3842
3843         if (reset_mask & RADEON_RESET_DMA) {
3844                 /* dma0 */
3845                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3846                 tmp &= ~DMA_RB_ENABLE;
3847                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3848         }
3849         if (reset_mask & RADEON_RESET_DMA1) {
3850                 /* dma1 */
3851                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3852                 tmp &= ~DMA_RB_ENABLE;
3853                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3854         }
3855
3856         udelay(50);
3857
3858         evergreen_mc_stop(rdev, &save);
3859         if (evergreen_mc_wait_for_idle(rdev)) {
3860                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3861         }
3862
3863         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3864                 grbm_soft_reset = SOFT_RESET_CB |
3865                         SOFT_RESET_DB |
3866                         SOFT_RESET_GDS |
3867                         SOFT_RESET_PA |
3868                         SOFT_RESET_SC |
3869                         SOFT_RESET_BCI |
3870                         SOFT_RESET_SPI |
3871                         SOFT_RESET_SX |
3872                         SOFT_RESET_TC |
3873                         SOFT_RESET_TA |
3874                         SOFT_RESET_VGT |
3875                         SOFT_RESET_IA;
3876         }
3877
3878         if (reset_mask & RADEON_RESET_CP) {
3879                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3880
3881                 srbm_soft_reset |= SOFT_RESET_GRBM;
3882         }
3883
3884         if (reset_mask & RADEON_RESET_DMA)
3885                 srbm_soft_reset |= SOFT_RESET_DMA;
3886
3887         if (reset_mask & RADEON_RESET_DMA1)
3888                 srbm_soft_reset |= SOFT_RESET_DMA1;
3889
3890         if (reset_mask & RADEON_RESET_DISPLAY)
3891                 srbm_soft_reset |= SOFT_RESET_DC;
3892
3893         if (reset_mask & RADEON_RESET_RLC)
3894                 grbm_soft_reset |= SOFT_RESET_RLC;
3895
3896         if (reset_mask & RADEON_RESET_SEM)
3897                 srbm_soft_reset |= SOFT_RESET_SEM;
3898
3899         if (reset_mask & RADEON_RESET_IH)
3900                 srbm_soft_reset |= SOFT_RESET_IH;
3901
3902         if (reset_mask & RADEON_RESET_GRBM)
3903                 srbm_soft_reset |= SOFT_RESET_GRBM;
3904
3905         if (reset_mask & RADEON_RESET_VMC)
3906                 srbm_soft_reset |= SOFT_RESET_VMC;
3907
3908         if (reset_mask & RADEON_RESET_MC)
3909                 srbm_soft_reset |= SOFT_RESET_MC;
3910
3911         if (grbm_soft_reset) {
3912                 tmp = RREG32(GRBM_SOFT_RESET);
3913                 tmp |= grbm_soft_reset;
3914                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3915                 WREG32(GRBM_SOFT_RESET, tmp);
3916                 tmp = RREG32(GRBM_SOFT_RESET);
3917
3918                 udelay(50);
3919
3920                 tmp &= ~grbm_soft_reset;
3921                 WREG32(GRBM_SOFT_RESET, tmp);
3922                 tmp = RREG32(GRBM_SOFT_RESET);
3923         }
3924
3925         if (srbm_soft_reset) {
3926                 tmp = RREG32(SRBM_SOFT_RESET);
3927                 tmp |= srbm_soft_reset;
3928                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3929                 WREG32(SRBM_SOFT_RESET, tmp);
3930                 tmp = RREG32(SRBM_SOFT_RESET);
3931
3932                 udelay(50);
3933
3934                 tmp &= ~srbm_soft_reset;
3935                 WREG32(SRBM_SOFT_RESET, tmp);
3936                 tmp = RREG32(SRBM_SOFT_RESET);
3937         }
3938
3939         /* Wait a little for things to settle down */
3940         udelay(50);
3941
3942         evergreen_mc_resume(rdev, &save);
3943         udelay(50);
3944
3945         evergreen_print_gpu_status_regs(rdev);
3946 }
3947
3948 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3949 {
3950         u32 tmp, i;
3951
3952         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3953         tmp |= SPLL_BYPASS_EN;
3954         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3955
3956         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3957         tmp |= SPLL_CTLREQ_CHG;
3958         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3959
3960         for (i = 0; i < rdev->usec_timeout; i++) {
3961                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3962                         break;
3963                 udelay(1);
3964         }
3965
3966         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3967         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3968         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3969
3970         tmp = RREG32(MPLL_CNTL_MODE);
3971         tmp &= ~MPLL_MCLK_SEL;
3972         WREG32(MPLL_CNTL_MODE, tmp);
3973 }
3974
3975 static void si_spll_powerdown(struct radeon_device *rdev)
3976 {
3977         u32 tmp;
3978
3979         tmp = RREG32(SPLL_CNTL_MODE);
3980         tmp |= SPLL_SW_DIR_CONTROL;
3981         WREG32(SPLL_CNTL_MODE, tmp);
3982
3983         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3984         tmp |= SPLL_RESET;
3985         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3986
3987         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3988         tmp |= SPLL_SLEEP;
3989         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3990
3991         tmp = RREG32(SPLL_CNTL_MODE);
3992         tmp &= ~SPLL_SW_DIR_CONTROL;
3993         WREG32(SPLL_CNTL_MODE, tmp);
3994 }
3995
3996 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3997 {
3998         struct evergreen_mc_save save;
3999         u32 tmp, i;
4000
4001         dev_info(rdev->dev, "GPU pci config reset\n");
4002
4003         /* disable dpm? */
4004
4005         /* disable cg/pg */
4006         si_fini_pg(rdev);
4007         si_fini_cg(rdev);
4008
4009         /* Disable CP parsing/prefetching */
4010         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4011         /* dma0 */
4012         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4013         tmp &= ~DMA_RB_ENABLE;
4014         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4015         /* dma1 */
4016         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4017         tmp &= ~DMA_RB_ENABLE;
4018         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4019         /* XXX other engines? */
4020
4021         /* halt the rlc, disable cp internal ints */
4022         si_rlc_stop(rdev);
4023
4024         udelay(50);
4025
4026         /* disable mem access */
4027         evergreen_mc_stop(rdev, &save);
4028         if (evergreen_mc_wait_for_idle(rdev)) {
4029                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4030         }
4031
4032         /* set mclk/sclk to bypass */
4033         si_set_clk_bypass_mode(rdev);
4034         /* powerdown spll */
4035         si_spll_powerdown(rdev);
4036         /* disable BM */
4037         pci_clear_master(rdev->pdev);
4038         /* reset */
4039         radeon_pci_config_reset(rdev);
4040         /* wait for asic to come out of reset */
4041         for (i = 0; i < rdev->usec_timeout; i++) {
4042                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4043                         break;
4044                 udelay(1);
4045         }
4046 }
4047
4048 int si_asic_reset(struct radeon_device *rdev)
4049 {
4050         u32 reset_mask;
4051
4052         reset_mask = si_gpu_check_soft_reset(rdev);
4053
4054         if (reset_mask)
4055                 r600_set_bios_scratch_engine_hung(rdev, true);
4056
4057         /* try soft reset */
4058         si_gpu_soft_reset(rdev, reset_mask);
4059
4060         reset_mask = si_gpu_check_soft_reset(rdev);
4061
4062         /* try pci config reset */
4063         if (reset_mask && radeon_hard_reset)
4064                 si_gpu_pci_config_reset(rdev);
4065
4066         reset_mask = si_gpu_check_soft_reset(rdev);
4067
4068         if (!reset_mask)
4069                 r600_set_bios_scratch_engine_hung(rdev, false);
4070
4071         return 0;
4072 }
4073
4074 /**
4075  * si_gfx_is_lockup - Check if the GFX engine is locked up
4076  *
4077  * @rdev: radeon_device pointer
4078  * @ring: radeon_ring structure holding ring information
4079  *
4080  * Check if the GFX engine is locked up.
4081  * Returns true if the engine appears to be locked up, false if not.
4082  */
4083 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4084 {
4085         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4086
4087         if (!(reset_mask & (RADEON_RESET_GFX |
4088                             RADEON_RESET_COMPUTE |
4089                             RADEON_RESET_CP))) {
4090                 radeon_ring_lockup_update(rdev, ring);
4091                 return false;
4092         }
4093         return radeon_ring_test_lockup(rdev, ring);
4094 }
4095
4096 /* MC */
4097 static void si_mc_program(struct radeon_device *rdev)
4098 {
4099         struct evergreen_mc_save save;
4100         u32 tmp;
4101         int i, j;
4102
4103         /* Initialize HDP */
4104         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4105                 WREG32((0x2c14 + j), 0x00000000);
4106                 WREG32((0x2c18 + j), 0x00000000);
4107                 WREG32((0x2c1c + j), 0x00000000);
4108                 WREG32((0x2c20 + j), 0x00000000);
4109                 WREG32((0x2c24 + j), 0x00000000);
4110         }
4111         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4112
4113         evergreen_mc_stop(rdev, &save);
4114         if (radeon_mc_wait_for_idle(rdev)) {
4115                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4116         }
4117         if (!ASIC_IS_NODCE(rdev))
4118                 /* Lockout access through VGA aperture*/
4119                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4120         /* Update configuration */
4121         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4122                rdev->mc.vram_start >> 12);
4123         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4124                rdev->mc.vram_end >> 12);
4125         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4126                rdev->vram_scratch.gpu_addr >> 12);
4127         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4128         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4129         WREG32(MC_VM_FB_LOCATION, tmp);
4130         /* XXX double check these! */
4131         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4132         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4133         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4134         WREG32(MC_VM_AGP_BASE, 0);
4135         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4136         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4137         if (radeon_mc_wait_for_idle(rdev)) {
4138                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4139         }
4140         evergreen_mc_resume(rdev, &save);
4141         if (!ASIC_IS_NODCE(rdev)) {
4142                 /* we need to own VRAM, so turn off the VGA renderer here
4143                  * to stop it overwriting our objects */
4144                 rv515_vga_render_disable(rdev);
4145         }
4146 }
4147
4148 void si_vram_gtt_location(struct radeon_device *rdev,
4149                           struct radeon_mc *mc)
4150 {
4151         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4152                 /* leave room for at least 1024M GTT */
4153                 dev_warn(rdev->dev, "limiting VRAM\n");
4154                 mc->real_vram_size = 0xFFC0000000ULL;
4155                 mc->mc_vram_size = 0xFFC0000000ULL;
4156         }
4157         radeon_vram_location(rdev, &rdev->mc, 0);
4158         rdev->mc.gtt_base_align = 0;
4159         radeon_gtt_location(rdev, mc);
4160 }
4161
4162 static int si_mc_init(struct radeon_device *rdev)
4163 {
4164         u32 tmp;
4165         int chansize, numchan;
4166
4167         /* Get VRAM informations */
4168         rdev->mc.vram_is_ddr = true;
4169         tmp = RREG32(MC_ARB_RAMCFG);
4170         if (tmp & CHANSIZE_OVERRIDE) {
4171                 chansize = 16;
4172         } else if (tmp & CHANSIZE_MASK) {
4173                 chansize = 64;
4174         } else {
4175                 chansize = 32;
4176         }
4177         tmp = RREG32(MC_SHARED_CHMAP);
4178         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4179         case 0:
4180         default:
4181                 numchan = 1;
4182                 break;
4183         case 1:
4184                 numchan = 2;
4185                 break;
4186         case 2:
4187                 numchan = 4;
4188                 break;
4189         case 3:
4190                 numchan = 8;
4191                 break;
4192         case 4:
4193                 numchan = 3;
4194                 break;
4195         case 5:
4196                 numchan = 6;
4197                 break;
4198         case 6:
4199                 numchan = 10;
4200                 break;
4201         case 7:
4202                 numchan = 12;
4203                 break;
4204         case 8:
4205                 numchan = 16;
4206                 break;
4207         }
4208         rdev->mc.vram_width = numchan * chansize;
4209         /* Could aper size report 0 ? */
4210         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4211         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4212         /* size in MB on si */
4213         tmp = RREG32(CONFIG_MEMSIZE);
4214         /* some boards may have garbage in the upper 16 bits */
4215         if (tmp & 0xffff0000) {
4216                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4217                 if (tmp & 0xffff)
4218                         tmp &= 0xffff;
4219         }
4220         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4221         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4222         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4223         si_vram_gtt_location(rdev, &rdev->mc);
4224         radeon_update_bandwidth_info(rdev);
4225
4226         return 0;
4227 }
4228
4229 /*
4230  * GART
4231  */
4232 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4233 {
4234         /* flush hdp cache */
4235         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4236
4237         /* bits 0-15 are the VM contexts0-15 */
4238         WREG32(VM_INVALIDATE_REQUEST, 1);
4239 }
4240
4241 static int si_pcie_gart_enable(struct radeon_device *rdev)
4242 {
4243         int r, i;
4244
4245         if (rdev->gart.robj == NULL) {
4246                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4247                 return -EINVAL;
4248         }
4249         r = radeon_gart_table_vram_pin(rdev);
4250         if (r)
4251                 return r;
4252         /* Setup TLB control */
4253         WREG32(MC_VM_MX_L1_TLB_CNTL,
4254                (0xA << 7) |
4255                ENABLE_L1_TLB |
4256                ENABLE_L1_FRAGMENT_PROCESSING |
4257                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4258                ENABLE_ADVANCED_DRIVER_MODEL |
4259                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4260         /* Setup L2 cache */
4261         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4262                ENABLE_L2_FRAGMENT_PROCESSING |
4263                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4264                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4265                EFFECTIVE_L2_QUEUE_SIZE(7) |
4266                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4267         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4268         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4269                BANK_SELECT(4) |
4270                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4271         /* setup context0 */
4272         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4273         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4274         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4275         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4276                         (u32)(rdev->dummy_page.addr >> 12));
4277         WREG32(VM_CONTEXT0_CNTL2, 0);
4278         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4279                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4280
4281         WREG32(0x15D4, 0);
4282         WREG32(0x15D8, 0);
4283         WREG32(0x15DC, 0);
4284
4285         /* empty context1-15 */
4286         /* set vm size, must be a multiple of 4 */
4287         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4288         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4289         /* Assign the pt base to something valid for now; the pts used for
4290          * the VMs are determined by the application and setup and assigned
4291          * on the fly in the vm part of radeon_gart.c
4292          */
4293         for (i = 1; i < 16; i++) {
4294                 if (i < 8)
4295                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4296                                rdev->vm_manager.saved_table_addr[i]);
4297                 else
4298                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4299                                rdev->vm_manager.saved_table_addr[i]);
4300         }
4301
4302         /* enable context1-15 */
4303         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4304                (u32)(rdev->dummy_page.addr >> 12));
4305         WREG32(VM_CONTEXT1_CNTL2, 4);
4306         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4307                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4308                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4309                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4310                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4311                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4312                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4313                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4314                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4315                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4316                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4317                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4318                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4319                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4320
4321         si_pcie_gart_tlb_flush(rdev);
4322         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4323                  (unsigned)(rdev->mc.gtt_size >> 20),
4324                  (unsigned long long)rdev->gart.table_addr);
4325         rdev->gart.ready = true;
4326         return 0;
4327 }
4328
4329 static void si_pcie_gart_disable(struct radeon_device *rdev)
4330 {
4331         unsigned i;
4332
4333         for (i = 1; i < 16; ++i) {
4334                 uint32_t reg;
4335                 if (i < 8)
4336                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4337                 else
4338                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4339                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4340         }
4341
4342         /* Disable all tables */
4343         WREG32(VM_CONTEXT0_CNTL, 0);
4344         WREG32(VM_CONTEXT1_CNTL, 0);
4345         /* Setup TLB control */
4346         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4347                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4348         /* Setup L2 cache */
4349         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4350                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4351                EFFECTIVE_L2_QUEUE_SIZE(7) |
4352                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4353         WREG32(VM_L2_CNTL2, 0);
4354         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4355                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4356         radeon_gart_table_vram_unpin(rdev);
4357 }
4358
4359 static void si_pcie_gart_fini(struct radeon_device *rdev)
4360 {
4361         si_pcie_gart_disable(rdev);
4362         radeon_gart_table_vram_free(rdev);
4363         radeon_gart_fini(rdev);
4364 }
4365
4366 /* vm parser */
4367 static bool si_vm_reg_valid(u32 reg)
4368 {
4369         /* context regs are fine */
4370         if (reg >= 0x28000)
4371                 return true;
4372
4373         /* check config regs */
4374         switch (reg) {
4375         case GRBM_GFX_INDEX:
4376         case CP_STRMOUT_CNTL:
4377         case VGT_VTX_VECT_EJECT_REG:
4378         case VGT_CACHE_INVALIDATION:
4379         case VGT_ESGS_RING_SIZE:
4380         case VGT_GSVS_RING_SIZE:
4381         case VGT_GS_VERTEX_REUSE:
4382         case VGT_PRIMITIVE_TYPE:
4383         case VGT_INDEX_TYPE:
4384         case VGT_NUM_INDICES:
4385         case VGT_NUM_INSTANCES:
4386         case VGT_TF_RING_SIZE:
4387         case VGT_HS_OFFCHIP_PARAM:
4388         case VGT_TF_MEMORY_BASE:
4389         case PA_CL_ENHANCE:
4390         case PA_SU_LINE_STIPPLE_VALUE:
4391         case PA_SC_LINE_STIPPLE_STATE:
4392         case PA_SC_ENHANCE:
4393         case SQC_CACHES:
4394         case SPI_STATIC_THREAD_MGMT_1:
4395         case SPI_STATIC_THREAD_MGMT_2:
4396         case SPI_STATIC_THREAD_MGMT_3:
4397         case SPI_PS_MAX_WAVE_ID:
4398         case SPI_CONFIG_CNTL:
4399         case SPI_CONFIG_CNTL_1:
4400         case TA_CNTL_AUX:
4401                 return true;
4402         default:
4403                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4404                 return false;
4405         }
4406 }
4407
4408 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4409                                   u32 *ib, struct radeon_cs_packet *pkt)
4410 {
4411         switch (pkt->opcode) {
4412         case PACKET3_NOP:
4413         case PACKET3_SET_BASE:
4414         case PACKET3_SET_CE_DE_COUNTERS:
4415         case PACKET3_LOAD_CONST_RAM:
4416         case PACKET3_WRITE_CONST_RAM:
4417         case PACKET3_WRITE_CONST_RAM_OFFSET:
4418         case PACKET3_DUMP_CONST_RAM:
4419         case PACKET3_INCREMENT_CE_COUNTER:
4420         case PACKET3_WAIT_ON_DE_COUNTER:
4421         case PACKET3_CE_WRITE:
4422                 break;
4423         default:
4424                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4425                 return -EINVAL;
4426         }
4427         return 0;
4428 }
4429
4430 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4431 {
4432         u32 start_reg, reg, i;
4433         u32 command = ib[idx + 4];
4434         u32 info = ib[idx + 1];
4435         u32 idx_value = ib[idx];
4436         if (command & PACKET3_CP_DMA_CMD_SAS) {
4437                 /* src address space is register */
4438                 if (((info & 0x60000000) >> 29) == 0) {
4439                         start_reg = idx_value << 2;
4440                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4441                                 reg = start_reg;
4442                                 if (!si_vm_reg_valid(reg)) {
4443                                         DRM_ERROR("CP DMA Bad SRC register\n");
4444                                         return -EINVAL;
4445                                 }
4446                         } else {
4447                                 for (i = 0; i < (command & 0x1fffff); i++) {
4448                                         reg = start_reg + (4 * i);
4449                                         if (!si_vm_reg_valid(reg)) {
4450                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4451                                                 return -EINVAL;
4452                                         }
4453                                 }
4454                         }
4455                 }
4456         }
4457         if (command & PACKET3_CP_DMA_CMD_DAS) {
4458                 /* dst address space is register */
4459                 if (((info & 0x00300000) >> 20) == 0) {
4460                         start_reg = ib[idx + 2];
4461                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4462                                 reg = start_reg;
4463                                 if (!si_vm_reg_valid(reg)) {
4464                                         DRM_ERROR("CP DMA Bad DST register\n");
4465                                         return -EINVAL;
4466                                 }
4467                         } else {
4468                                 for (i = 0; i < (command & 0x1fffff); i++) {
4469                                         reg = start_reg + (4 * i);
4470                                 if (!si_vm_reg_valid(reg)) {
4471                                                 DRM_ERROR("CP DMA Bad DST register\n");
4472                                                 return -EINVAL;
4473                                         }
4474                                 }
4475                         }
4476                 }
4477         }
4478         return 0;
4479 }
4480
4481 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4482                                    u32 *ib, struct radeon_cs_packet *pkt)
4483 {
4484         int r;
4485         u32 idx = pkt->idx + 1;
4486         u32 idx_value = ib[idx];
4487         u32 start_reg, end_reg, reg, i;
4488
4489         switch (pkt->opcode) {
4490         case PACKET3_NOP:
4491         case PACKET3_SET_BASE:
4492         case PACKET3_CLEAR_STATE:
4493         case PACKET3_INDEX_BUFFER_SIZE:
4494         case PACKET3_DISPATCH_DIRECT:
4495         case PACKET3_DISPATCH_INDIRECT:
4496         case PACKET3_ALLOC_GDS:
4497         case PACKET3_WRITE_GDS_RAM:
4498         case PACKET3_ATOMIC_GDS:
4499         case PACKET3_ATOMIC:
4500         case PACKET3_OCCLUSION_QUERY:
4501         case PACKET3_SET_PREDICATION:
4502         case PACKET3_COND_EXEC:
4503         case PACKET3_PRED_EXEC:
4504         case PACKET3_DRAW_INDIRECT:
4505         case PACKET3_DRAW_INDEX_INDIRECT:
4506         case PACKET3_INDEX_BASE:
4507         case PACKET3_DRAW_INDEX_2:
4508         case PACKET3_CONTEXT_CONTROL:
4509         case PACKET3_INDEX_TYPE:
4510         case PACKET3_DRAW_INDIRECT_MULTI:
4511         case PACKET3_DRAW_INDEX_AUTO:
4512         case PACKET3_DRAW_INDEX_IMMD:
4513         case PACKET3_NUM_INSTANCES:
4514         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4515         case PACKET3_STRMOUT_BUFFER_UPDATE:
4516         case PACKET3_DRAW_INDEX_OFFSET_2:
4517         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4518         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4519         case PACKET3_MPEG_INDEX:
4520         case PACKET3_WAIT_REG_MEM:
4521         case PACKET3_MEM_WRITE:
4522         case PACKET3_PFP_SYNC_ME:
4523         case PACKET3_SURFACE_SYNC:
4524         case PACKET3_EVENT_WRITE:
4525         case PACKET3_EVENT_WRITE_EOP:
4526         case PACKET3_EVENT_WRITE_EOS:
4527         case PACKET3_SET_CONTEXT_REG:
4528         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4529         case PACKET3_SET_SH_REG:
4530         case PACKET3_SET_SH_REG_OFFSET:
4531         case PACKET3_INCREMENT_DE_COUNTER:
4532         case PACKET3_WAIT_ON_CE_COUNTER:
4533         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4534         case PACKET3_ME_WRITE:
4535                 break;
4536         case PACKET3_COPY_DATA:
4537                 if ((idx_value & 0xf00) == 0) {
4538                         reg = ib[idx + 3] * 4;
4539                         if (!si_vm_reg_valid(reg))
4540                                 return -EINVAL;
4541                 }
4542                 break;
4543         case PACKET3_WRITE_DATA:
4544                 if ((idx_value & 0xf00) == 0) {
4545                         start_reg = ib[idx + 1] * 4;
4546                         if (idx_value & 0x10000) {
4547                                 if (!si_vm_reg_valid(start_reg))
4548                                         return -EINVAL;
4549                         } else {
4550                                 for (i = 0; i < (pkt->count - 2); i++) {
4551                                         reg = start_reg + (4 * i);
4552                                         if (!si_vm_reg_valid(reg))
4553                                                 return -EINVAL;
4554                                 }
4555                         }
4556                 }
4557                 break;
4558         case PACKET3_COND_WRITE:
4559                 if (idx_value & 0x100) {
4560                         reg = ib[idx + 5] * 4;
4561                         if (!si_vm_reg_valid(reg))
4562                                 return -EINVAL;
4563                 }
4564                 break;
4565         case PACKET3_COPY_DW:
4566                 if (idx_value & 0x2) {
4567                         reg = ib[idx + 3] * 4;
4568                         if (!si_vm_reg_valid(reg))
4569                                 return -EINVAL;
4570                 }
4571                 break;
4572         case PACKET3_SET_CONFIG_REG:
4573                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4574                 end_reg = 4 * pkt->count + start_reg - 4;
4575                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4576                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4577                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4578                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4579                         return -EINVAL;
4580                 }
4581                 for (i = 0; i < pkt->count; i++) {
4582                         reg = start_reg + (4 * i);
4583                         if (!si_vm_reg_valid(reg))
4584                                 return -EINVAL;
4585                 }
4586                 break;
4587         case PACKET3_CP_DMA:
4588                 r = si_vm_packet3_cp_dma_check(ib, idx);
4589                 if (r)
4590                         return r;
4591                 break;
4592         default:
4593                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4594                 return -EINVAL;
4595         }
4596         return 0;
4597 }
4598
4599 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4600                                        u32 *ib, struct radeon_cs_packet *pkt)
4601 {
4602         int r;
4603         u32 idx = pkt->idx + 1;
4604         u32 idx_value = ib[idx];
4605         u32 start_reg, reg, i;
4606
4607         switch (pkt->opcode) {
4608         case PACKET3_NOP:
4609         case PACKET3_SET_BASE:
4610         case PACKET3_CLEAR_STATE:
4611         case PACKET3_DISPATCH_DIRECT:
4612         case PACKET3_DISPATCH_INDIRECT:
4613         case PACKET3_ALLOC_GDS:
4614         case PACKET3_WRITE_GDS_RAM:
4615         case PACKET3_ATOMIC_GDS:
4616         case PACKET3_ATOMIC:
4617         case PACKET3_OCCLUSION_QUERY:
4618         case PACKET3_SET_PREDICATION:
4619         case PACKET3_COND_EXEC:
4620         case PACKET3_PRED_EXEC:
4621         case PACKET3_CONTEXT_CONTROL:
4622         case PACKET3_STRMOUT_BUFFER_UPDATE:
4623         case PACKET3_WAIT_REG_MEM:
4624         case PACKET3_MEM_WRITE:
4625         case PACKET3_PFP_SYNC_ME:
4626         case PACKET3_SURFACE_SYNC:
4627         case PACKET3_EVENT_WRITE:
4628         case PACKET3_EVENT_WRITE_EOP:
4629         case PACKET3_EVENT_WRITE_EOS:
4630         case PACKET3_SET_CONTEXT_REG:
4631         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4632         case PACKET3_SET_SH_REG:
4633         case PACKET3_SET_SH_REG_OFFSET:
4634         case PACKET3_INCREMENT_DE_COUNTER:
4635         case PACKET3_WAIT_ON_CE_COUNTER:
4636         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4637         case PACKET3_ME_WRITE:
4638                 break;
4639         case PACKET3_COPY_DATA:
4640                 if ((idx_value & 0xf00) == 0) {
4641                         reg = ib[idx + 3] * 4;
4642                         if (!si_vm_reg_valid(reg))
4643                                 return -EINVAL;
4644                 }
4645                 break;
4646         case PACKET3_WRITE_DATA:
4647                 if ((idx_value & 0xf00) == 0) {
4648                         start_reg = ib[idx + 1] * 4;
4649                         if (idx_value & 0x10000) {
4650                                 if (!si_vm_reg_valid(start_reg))
4651                                         return -EINVAL;
4652                         } else {
4653                                 for (i = 0; i < (pkt->count - 2); i++) {
4654                                         reg = start_reg + (4 * i);
4655                                         if (!si_vm_reg_valid(reg))
4656                                                 return -EINVAL;
4657                                 }
4658                         }
4659                 }
4660                 break;
4661         case PACKET3_COND_WRITE:
4662                 if (idx_value & 0x100) {
4663                         reg = ib[idx + 5] * 4;
4664                         if (!si_vm_reg_valid(reg))
4665                                 return -EINVAL;
4666                 }
4667                 break;
4668         case PACKET3_COPY_DW:
4669                 if (idx_value & 0x2) {
4670                         reg = ib[idx + 3] * 4;
4671                         if (!si_vm_reg_valid(reg))
4672                                 return -EINVAL;
4673                 }
4674                 break;
4675         case PACKET3_CP_DMA:
4676                 r = si_vm_packet3_cp_dma_check(ib, idx);
4677                 if (r)
4678                         return r;
4679                 break;
4680         default:
4681                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4682                 return -EINVAL;
4683         }
4684         return 0;
4685 }
4686
4687 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4688 {
4689         int ret = 0;
4690         u32 idx = 0, i;
4691         struct radeon_cs_packet pkt;
4692
4693         do {
4694                 pkt.idx = idx;
4695                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4696                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4697                 pkt.one_reg_wr = 0;
4698                 switch (pkt.type) {
4699                 case RADEON_PACKET_TYPE0:
4700                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4701                         for (i = 0; i < ib->length_dw; i++) {
4702                                 if (i == idx)
4703                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4704                                 else
4705                                         printk("\t0x%08x\n", ib->ptr[i]);
4706                         }
4707                         ret = -EINVAL;
4708                         break;
4709                 case RADEON_PACKET_TYPE2:
4710                         idx += 1;
4711                         break;
4712                 case RADEON_PACKET_TYPE3:
4713                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4714                         if (ib->is_const_ib)
4715                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4716                         else {
4717                                 switch (ib->ring) {
4718                                 case RADEON_RING_TYPE_GFX_INDEX:
4719                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4720                                         break;
4721                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4722                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4723                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4724                                         break;
4725                                 default:
4726                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4727                                         ret = -EINVAL;
4728                                         break;
4729                                 }
4730                         }
4731                         idx += pkt.count + 2;
4732                         break;
4733                 default:
4734                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4735                         ret = -EINVAL;
4736                         break;
4737                 }
4738                 if (ret)
4739                         break;
4740         } while (idx < ib->length_dw);
4741
4742         return ret;
4743 }
4744
4745 /*
4746  * vm
4747  */
4748 int si_vm_init(struct radeon_device *rdev)
4749 {
4750         /* number of VMs */
4751         rdev->vm_manager.nvm = 16;
4752         /* base offset of vram pages */
4753         rdev->vm_manager.vram_base_offset = 0;
4754
4755         return 0;
4756 }
4757
4758 void si_vm_fini(struct radeon_device *rdev)
4759 {
4760 }
4761
4762 /**
4763  * si_vm_decode_fault - print human readable fault info
4764  *
4765  * @rdev: radeon_device pointer
4766  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4767  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4768  *
4769  * Print human readable fault information (SI).
4770  */
4771 static void si_vm_decode_fault(struct radeon_device *rdev,
4772                                u32 status, u32 addr)
4773 {
4774         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4775         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4776         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4777         char *block;
4778
4779         if (rdev->family == CHIP_TAHITI) {
4780                 switch (mc_id) {
4781                 case 160:
4782                 case 144:
4783                 case 96:
4784                 case 80:
4785                 case 224:
4786                 case 208:
4787                 case 32:
4788                 case 16:
4789                         block = "CB";
4790                         break;
4791                 case 161:
4792                 case 145:
4793                 case 97:
4794                 case 81:
4795                 case 225:
4796                 case 209:
4797                 case 33:
4798                 case 17:
4799                         block = "CB_FMASK";
4800                         break;
4801                 case 162:
4802                 case 146:
4803                 case 98:
4804                 case 82:
4805                 case 226:
4806                 case 210:
4807                 case 34:
4808                 case 18:
4809                         block = "CB_CMASK";
4810                         break;
4811                 case 163:
4812                 case 147:
4813                 case 99:
4814                 case 83:
4815                 case 227:
4816                 case 211:
4817                 case 35:
4818                 case 19:
4819                         block = "CB_IMMED";
4820                         break;
4821                 case 164:
4822                 case 148:
4823                 case 100:
4824                 case 84:
4825                 case 228:
4826                 case 212:
4827                 case 36:
4828                 case 20:
4829                         block = "DB";
4830                         break;
4831                 case 165:
4832                 case 149:
4833                 case 101:
4834                 case 85:
4835                 case 229:
4836                 case 213:
4837                 case 37:
4838                 case 21:
4839                         block = "DB_HTILE";
4840                         break;
4841                 case 167:
4842                 case 151:
4843                 case 103:
4844                 case 87:
4845                 case 231:
4846                 case 215:
4847                 case 39:
4848                 case 23:
4849                         block = "DB_STEN";
4850                         break;
4851                 case 72:
4852                 case 68:
4853                 case 64:
4854                 case 8:
4855                 case 4:
4856                 case 0:
4857                 case 136:
4858                 case 132:
4859                 case 128:
4860                 case 200:
4861                 case 196:
4862                 case 192:
4863                         block = "TC";
4864                         break;
4865                 case 112:
4866                 case 48:
4867                         block = "CP";
4868                         break;
4869                 case 49:
4870                 case 177:
4871                 case 50:
4872                 case 178:
4873                         block = "SH";
4874                         break;
4875                 case 53:
4876                 case 190:
4877                         block = "VGT";
4878                         break;
4879                 case 117:
4880                         block = "IH";
4881                         break;
4882                 case 51:
4883                 case 115:
4884                         block = "RLC";
4885                         break;
4886                 case 119:
4887                 case 183:
4888                         block = "DMA0";
4889                         break;
4890                 case 61:
4891                         block = "DMA1";
4892                         break;
4893                 case 248:
4894                 case 120:
4895                         block = "HDP";
4896                         break;
4897                 default:
4898                         block = "unknown";
4899                         break;
4900                 }
4901         } else {
4902                 switch (mc_id) {
4903                 case 32:
4904                 case 16:
4905                 case 96:
4906                 case 80:
4907                 case 160:
4908                 case 144:
4909                 case 224:
4910                 case 208:
4911                         block = "CB";
4912                         break;
4913                 case 33:
4914                 case 17:
4915                 case 97:
4916                 case 81:
4917                 case 161:
4918                 case 145:
4919                 case 225:
4920                 case 209:
4921                         block = "CB_FMASK";
4922                         break;
4923                 case 34:
4924                 case 18:
4925                 case 98:
4926                 case 82:
4927                 case 162:
4928                 case 146:
4929                 case 226:
4930                 case 210:
4931                         block = "CB_CMASK";
4932                         break;
4933                 case 35:
4934                 case 19:
4935                 case 99:
4936                 case 83:
4937                 case 163:
4938                 case 147:
4939                 case 227:
4940                 case 211:
4941                         block = "CB_IMMED";
4942                         break;
4943                 case 36:
4944                 case 20:
4945                 case 100:
4946                 case 84:
4947                 case 164:
4948                 case 148:
4949                 case 228:
4950                 case 212:
4951                         block = "DB";
4952                         break;
4953                 case 37:
4954                 case 21:
4955                 case 101:
4956                 case 85:
4957                 case 165:
4958                 case 149:
4959                 case 229:
4960                 case 213:
4961                         block = "DB_HTILE";
4962                         break;
4963                 case 39:
4964                 case 23:
4965                 case 103:
4966                 case 87:
4967                 case 167:
4968                 case 151:
4969                 case 231:
4970                 case 215:
4971                         block = "DB_STEN";
4972                         break;
4973                 case 72:
4974                 case 68:
4975                 case 8:
4976                 case 4:
4977                 case 136:
4978                 case 132:
4979                 case 200:
4980                 case 196:
4981                         block = "TC";
4982                         break;
4983                 case 112:
4984                 case 48:
4985                         block = "CP";
4986                         break;
4987                 case 49:
4988                 case 177:
4989                 case 50:
4990                 case 178:
4991                         block = "SH";
4992                         break;
4993                 case 53:
4994                         block = "VGT";
4995                         break;
4996                 case 117:
4997                         block = "IH";
4998                         break;
4999                 case 51:
5000                 case 115:
5001                         block = "RLC";
5002                         break;
5003                 case 119:
5004                 case 183:
5005                         block = "DMA0";
5006                         break;
5007                 case 61:
5008                         block = "DMA1";
5009                         break;
5010                 case 248:
5011                 case 120:
5012                         block = "HDP";
5013                         break;
5014                 default:
5015                         block = "unknown";
5016                         break;
5017                 }
5018         }
5019
5020         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5021                protections, vmid, addr,
5022                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5023                block, mc_id);
5024 }
5025
5026 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5027                  unsigned vm_id, uint64_t pd_addr)
5028 {
5029         /* write new base address */
5030         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5031         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5032                                  WRITE_DATA_DST_SEL(0)));
5033
5034         if (vm_id < 8) {
5035                 radeon_ring_write(ring,
5036                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5037         } else {
5038                 radeon_ring_write(ring,
5039                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5040         }
5041         radeon_ring_write(ring, 0);
5042         radeon_ring_write(ring, pd_addr >> 12);
5043
5044         /* flush hdp cache */
5045         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5046         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5047                                  WRITE_DATA_DST_SEL(0)));
5048         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5049         radeon_ring_write(ring, 0);
5050         radeon_ring_write(ring, 0x1);
5051
5052         /* bits 0-15 are the VM contexts0-15 */
5053         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5054         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5055                                  WRITE_DATA_DST_SEL(0)));
5056         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5057         radeon_ring_write(ring, 0);
5058         radeon_ring_write(ring, 1 << vm_id);
5059
5060         /* wait for the invalidate to complete */
5061         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5062         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5063                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5064         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5065         radeon_ring_write(ring, 0);
5066         radeon_ring_write(ring, 0); /* ref */
5067         radeon_ring_write(ring, 0); /* mask */
5068         radeon_ring_write(ring, 0x20); /* poll interval */
5069
5070         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5071         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5072         radeon_ring_write(ring, 0x0);
5073 }
5074
5075 /*
5076  *  Power and clock gating
5077  */
5078 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5079 {
5080         int i;
5081
5082         for (i = 0; i < rdev->usec_timeout; i++) {
5083                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5084                         break;
5085                 udelay(1);
5086         }
5087
5088         for (i = 0; i < rdev->usec_timeout; i++) {
5089                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5090                         break;
5091                 udelay(1);
5092         }
5093 }
5094
5095 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5096                                          bool enable)
5097 {
5098         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5099         u32 mask;
5100         int i;
5101
5102         if (enable)
5103                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5104         else
5105                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5106         WREG32(CP_INT_CNTL_RING0, tmp);
5107
5108         if (!enable) {
5109                 /* read a gfx register */
5110                 tmp = RREG32(DB_DEPTH_INFO);
5111
5112                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5113                 for (i = 0; i < rdev->usec_timeout; i++) {
5114                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5115                                 break;
5116                         udelay(1);
5117                 }
5118         }
5119 }
5120
5121 static void si_set_uvd_dcm(struct radeon_device *rdev,
5122                            bool sw_mode)
5123 {
5124         u32 tmp, tmp2;
5125
5126         tmp = RREG32(UVD_CGC_CTRL);
5127         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5128         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5129
5130         if (sw_mode) {
5131                 tmp &= ~0x7ffff800;
5132                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5133         } else {
5134                 tmp |= 0x7ffff800;
5135                 tmp2 = 0;
5136         }
5137
5138         WREG32(UVD_CGC_CTRL, tmp);
5139         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5140 }
5141
5142 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5143 {
5144         bool hw_mode = true;
5145
5146         if (hw_mode) {
5147                 si_set_uvd_dcm(rdev, false);
5148         } else {
5149                 u32 tmp = RREG32(UVD_CGC_CTRL);
5150                 tmp &= ~DCM;
5151                 WREG32(UVD_CGC_CTRL, tmp);
5152         }
5153 }
5154
5155 static u32 si_halt_rlc(struct radeon_device *rdev)
5156 {
5157         u32 data, orig;
5158
5159         orig = data = RREG32(RLC_CNTL);
5160
5161         if (data & RLC_ENABLE) {
5162                 data &= ~RLC_ENABLE;
5163                 WREG32(RLC_CNTL, data);
5164
5165                 si_wait_for_rlc_serdes(rdev);
5166         }
5167
5168         return orig;
5169 }
5170
5171 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5172 {
5173         u32 tmp;
5174
5175         tmp = RREG32(RLC_CNTL);
5176         if (tmp != rlc)
5177                 WREG32(RLC_CNTL, rlc);
5178 }
5179
5180 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5181 {
5182         u32 data, orig;
5183
5184         orig = data = RREG32(DMA_PG);
5185         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5186                 data |= PG_CNTL_ENABLE;
5187         else
5188                 data &= ~PG_CNTL_ENABLE;
5189         if (orig != data)
5190                 WREG32(DMA_PG, data);
5191 }
5192
5193 static void si_init_dma_pg(struct radeon_device *rdev)
5194 {
5195         u32 tmp;
5196
5197         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5198         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5199
5200         for (tmp = 0; tmp < 5; tmp++)
5201                 WREG32(DMA_PGFSM_WRITE, 0);
5202 }
5203
5204 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5205                                bool enable)
5206 {
5207         u32 tmp;
5208
5209         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5210                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5211                 WREG32(RLC_TTOP_D, tmp);
5212
5213                 tmp = RREG32(RLC_PG_CNTL);
5214                 tmp |= GFX_PG_ENABLE;
5215                 WREG32(RLC_PG_CNTL, tmp);
5216
5217                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5218                 tmp |= AUTO_PG_EN;
5219                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5220         } else {
5221                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5222                 tmp &= ~AUTO_PG_EN;
5223                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5224
5225                 tmp = RREG32(DB_RENDER_CONTROL);
5226         }
5227 }
5228
5229 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5230 {
5231         u32 tmp;
5232
5233         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5234
5235         tmp = RREG32(RLC_PG_CNTL);
5236         tmp |= GFX_PG_SRC;
5237         WREG32(RLC_PG_CNTL, tmp);
5238
5239         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5240
5241         tmp = RREG32(RLC_AUTO_PG_CTRL);
5242
5243         tmp &= ~GRBM_REG_SGIT_MASK;
5244         tmp |= GRBM_REG_SGIT(0x700);
5245         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5246         WREG32(RLC_AUTO_PG_CTRL, tmp);
5247 }
5248
5249 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5250 {
5251         u32 mask = 0, tmp, tmp1;
5252         int i;
5253
5254         si_select_se_sh(rdev, se, sh);
5255         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5256         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5257         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5258
5259         tmp &= 0xffff0000;
5260
5261         tmp |= tmp1;
5262         tmp >>= 16;
5263
5264         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5265                 mask <<= 1;
5266                 mask |= 1;
5267         }
5268
5269         return (~tmp) & mask;
5270 }
5271
5272 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5273 {
5274         u32 i, j, k, active_cu_number = 0;
5275         u32 mask, counter, cu_bitmap;
5276         u32 tmp = 0;
5277
5278         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5279                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5280                         mask = 1;
5281                         cu_bitmap = 0;
5282                         counter  = 0;
5283                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5284                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5285                                         if (counter < 2)
5286                                                 cu_bitmap |= mask;
5287                                         counter++;
5288                                 }
5289                                 mask <<= 1;
5290                         }
5291
5292                         active_cu_number += counter;
5293                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5294                 }
5295         }
5296
5297         WREG32(RLC_PG_AO_CU_MASK, tmp);
5298
5299         tmp = RREG32(RLC_MAX_PG_CU);
5300         tmp &= ~MAX_PU_CU_MASK;
5301         tmp |= MAX_PU_CU(active_cu_number);
5302         WREG32(RLC_MAX_PG_CU, tmp);
5303 }
5304
5305 static void si_enable_cgcg(struct radeon_device *rdev,
5306                            bool enable)
5307 {
5308         u32 data, orig, tmp;
5309
5310         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5311
5312         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5313                 si_enable_gui_idle_interrupt(rdev, true);
5314
5315                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5316
5317                 tmp = si_halt_rlc(rdev);
5318
5319                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5320                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5321                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5322
5323                 si_wait_for_rlc_serdes(rdev);
5324
5325                 si_update_rlc(rdev, tmp);
5326
5327                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5328
5329                 data |= CGCG_EN | CGLS_EN;
5330         } else {
5331                 si_enable_gui_idle_interrupt(rdev, false);
5332
5333                 RREG32(CB_CGTT_SCLK_CTRL);
5334                 RREG32(CB_CGTT_SCLK_CTRL);
5335                 RREG32(CB_CGTT_SCLK_CTRL);
5336                 RREG32(CB_CGTT_SCLK_CTRL);
5337
5338                 data &= ~(CGCG_EN | CGLS_EN);
5339         }
5340
5341         if (orig != data)
5342                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5343 }
5344
5345 static void si_enable_mgcg(struct radeon_device *rdev,
5346                            bool enable)
5347 {
5348         u32 data, orig, tmp = 0;
5349
5350         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5351                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5352                 data = 0x96940200;
5353                 if (orig != data)
5354                         WREG32(CGTS_SM_CTRL_REG, data);
5355
5356                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5357                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5358                         data |= CP_MEM_LS_EN;
5359                         if (orig != data)
5360                                 WREG32(CP_MEM_SLP_CNTL, data);
5361                 }
5362
5363                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5364                 data &= 0xffffffc0;
5365                 if (orig != data)
5366                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5367
5368                 tmp = si_halt_rlc(rdev);
5369
5370                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5371                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5372                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5373
5374                 si_update_rlc(rdev, tmp);
5375         } else {
5376                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5377                 data |= 0x00000003;
5378                 if (orig != data)
5379                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5380
5381                 data = RREG32(CP_MEM_SLP_CNTL);
5382                 if (data & CP_MEM_LS_EN) {
5383                         data &= ~CP_MEM_LS_EN;
5384                         WREG32(CP_MEM_SLP_CNTL, data);
5385                 }
5386                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5387                 data |= LS_OVERRIDE | OVERRIDE;
5388                 if (orig != data)
5389                         WREG32(CGTS_SM_CTRL_REG, data);
5390
5391                 tmp = si_halt_rlc(rdev);
5392
5393                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5394                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5395                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5396
5397                 si_update_rlc(rdev, tmp);
5398         }
5399 }
5400
5401 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5402                                bool enable)
5403 {
5404         u32 orig, data, tmp;
5405
5406         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5407                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5408                 tmp |= 0x3fff;
5409                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5410
5411                 orig = data = RREG32(UVD_CGC_CTRL);
5412                 data |= DCM;
5413                 if (orig != data)
5414                         WREG32(UVD_CGC_CTRL, data);
5415
5416                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5417                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5418         } else {
5419                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5420                 tmp &= ~0x3fff;
5421                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5422
5423                 orig = data = RREG32(UVD_CGC_CTRL);
5424                 data &= ~DCM;
5425                 if (orig != data)
5426                         WREG32(UVD_CGC_CTRL, data);
5427
5428                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5429                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5430         }
5431 }
5432
5433 static const u32 mc_cg_registers[] =
5434 {
5435         MC_HUB_MISC_HUB_CG,
5436         MC_HUB_MISC_SIP_CG,
5437         MC_HUB_MISC_VM_CG,
5438         MC_XPB_CLK_GAT,
5439         ATC_MISC_CG,
5440         MC_CITF_MISC_WR_CG,
5441         MC_CITF_MISC_RD_CG,
5442         MC_CITF_MISC_VM_CG,
5443         VM_L2_CG,
5444 };
5445
5446 static void si_enable_mc_ls(struct radeon_device *rdev,
5447                             bool enable)
5448 {
5449         int i;
5450         u32 orig, data;
5451
5452         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5453                 orig = data = RREG32(mc_cg_registers[i]);
5454                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5455                         data |= MC_LS_ENABLE;
5456                 else
5457                         data &= ~MC_LS_ENABLE;
5458                 if (data != orig)
5459                         WREG32(mc_cg_registers[i], data);
5460         }
5461 }
5462
5463 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5464                                bool enable)
5465 {
5466         int i;
5467         u32 orig, data;
5468
5469         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5470                 orig = data = RREG32(mc_cg_registers[i]);
5471                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5472                         data |= MC_CG_ENABLE;
5473                 else
5474                         data &= ~MC_CG_ENABLE;
5475                 if (data != orig)
5476                         WREG32(mc_cg_registers[i], data);
5477         }
5478 }
5479
5480 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5481                                bool enable)
5482 {
5483         u32 orig, data, offset;
5484         int i;
5485
5486         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5487                 for (i = 0; i < 2; i++) {
5488                         if (i == 0)
5489                                 offset = DMA0_REGISTER_OFFSET;
5490                         else
5491                                 offset = DMA1_REGISTER_OFFSET;
5492                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5493                         data &= ~MEM_POWER_OVERRIDE;
5494                         if (data != orig)
5495                                 WREG32(DMA_POWER_CNTL + offset, data);
5496                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5497                 }
5498         } else {
5499                 for (i = 0; i < 2; i++) {
5500                         if (i == 0)
5501                                 offset = DMA0_REGISTER_OFFSET;
5502                         else
5503                                 offset = DMA1_REGISTER_OFFSET;
5504                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5505                         data |= MEM_POWER_OVERRIDE;
5506                         if (data != orig)
5507                                 WREG32(DMA_POWER_CNTL + offset, data);
5508
5509                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5510                         data = 0xff000000;
5511                         if (data != orig)
5512                                 WREG32(DMA_CLK_CTRL + offset, data);
5513                 }
5514         }
5515 }
5516
5517 static void si_enable_bif_mgls(struct radeon_device *rdev,
5518                                bool enable)
5519 {
5520         u32 orig, data;
5521
5522         orig = data = RREG32_PCIE(PCIE_CNTL2);
5523
5524         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5525                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5526                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5527         else
5528                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5529                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5530
5531         if (orig != data)
5532                 WREG32_PCIE(PCIE_CNTL2, data);
5533 }
5534
5535 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5536                                bool enable)
5537 {
5538         u32 orig, data;
5539
5540         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5541
5542         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5543                 data &= ~CLOCK_GATING_DIS;
5544         else
5545                 data |= CLOCK_GATING_DIS;
5546
5547         if (orig != data)
5548                 WREG32(HDP_HOST_PATH_CNTL, data);
5549 }
5550
5551 static void si_enable_hdp_ls(struct radeon_device *rdev,
5552                              bool enable)
5553 {
5554         u32 orig, data;
5555
5556         orig = data = RREG32(HDP_MEM_POWER_LS);
5557
5558         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5559                 data |= HDP_LS_ENABLE;
5560         else
5561                 data &= ~HDP_LS_ENABLE;
5562
5563         if (orig != data)
5564                 WREG32(HDP_MEM_POWER_LS, data);
5565 }
5566
5567 static void si_update_cg(struct radeon_device *rdev,
5568                          u32 block, bool enable)
5569 {
5570         if (block & RADEON_CG_BLOCK_GFX) {
5571                 si_enable_gui_idle_interrupt(rdev, false);
5572                 /* order matters! */
5573                 if (enable) {
5574                         si_enable_mgcg(rdev, true);
5575                         si_enable_cgcg(rdev, true);
5576                 } else {
5577                         si_enable_cgcg(rdev, false);
5578                         si_enable_mgcg(rdev, false);
5579                 }
5580                 si_enable_gui_idle_interrupt(rdev, true);
5581         }
5582
5583         if (block & RADEON_CG_BLOCK_MC) {
5584                 si_enable_mc_mgcg(rdev, enable);
5585                 si_enable_mc_ls(rdev, enable);
5586         }
5587
5588         if (block & RADEON_CG_BLOCK_SDMA) {
5589                 si_enable_dma_mgcg(rdev, enable);
5590         }
5591
5592         if (block & RADEON_CG_BLOCK_BIF) {
5593                 si_enable_bif_mgls(rdev, enable);
5594         }
5595
5596         if (block & RADEON_CG_BLOCK_UVD) {
5597                 if (rdev->has_uvd) {
5598                         si_enable_uvd_mgcg(rdev, enable);
5599                 }
5600         }
5601
5602         if (block & RADEON_CG_BLOCK_HDP) {
5603                 si_enable_hdp_mgcg(rdev, enable);
5604                 si_enable_hdp_ls(rdev, enable);
5605         }
5606 }
5607
5608 static void si_init_cg(struct radeon_device *rdev)
5609 {
5610         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5611                             RADEON_CG_BLOCK_MC |
5612                             RADEON_CG_BLOCK_SDMA |
5613                             RADEON_CG_BLOCK_BIF |
5614                             RADEON_CG_BLOCK_HDP), true);
5615         if (rdev->has_uvd) {
5616                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5617                 si_init_uvd_internal_cg(rdev);
5618         }
5619 }
5620
5621 static void si_fini_cg(struct radeon_device *rdev)
5622 {
5623         if (rdev->has_uvd) {
5624                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5625         }
5626         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5627                             RADEON_CG_BLOCK_MC |
5628                             RADEON_CG_BLOCK_SDMA |
5629                             RADEON_CG_BLOCK_BIF |
5630                             RADEON_CG_BLOCK_HDP), false);
5631 }
5632
5633 u32 si_get_csb_size(struct radeon_device *rdev)
5634 {
5635         u32 count = 0;
5636         const struct cs_section_def *sect = NULL;
5637         const struct cs_extent_def *ext = NULL;
5638
5639         if (rdev->rlc.cs_data == NULL)
5640                 return 0;
5641
5642         /* begin clear state */
5643         count += 2;
5644         /* context control state */
5645         count += 3;
5646
5647         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5648                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5649                         if (sect->id == SECT_CONTEXT)
5650                                 count += 2 + ext->reg_count;
5651                         else
5652                                 return 0;
5653                 }
5654         }
5655         /* pa_sc_raster_config */
5656         count += 3;
5657         /* end clear state */
5658         count += 2;
5659         /* clear state */
5660         count += 2;
5661
5662         return count;
5663 }
5664
5665 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5666 {
5667         u32 count = 0, i;
5668         const struct cs_section_def *sect = NULL;
5669         const struct cs_extent_def *ext = NULL;
5670
5671         if (rdev->rlc.cs_data == NULL)
5672                 return;
5673         if (buffer == NULL)
5674                 return;
5675
5676         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5677         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5678
5679         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5680         buffer[count++] = cpu_to_le32(0x80000000);
5681         buffer[count++] = cpu_to_le32(0x80000000);
5682
5683         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5684                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5685                         if (sect->id == SECT_CONTEXT) {
5686                                 buffer[count++] =
5687                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5688                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5689                                 for (i = 0; i < ext->reg_count; i++)
5690                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5691                         } else {
5692                                 return;
5693                         }
5694                 }
5695         }
5696
5697         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5698         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5699         switch (rdev->family) {
5700         case CHIP_TAHITI:
5701         case CHIP_PITCAIRN:
5702                 buffer[count++] = cpu_to_le32(0x2a00126a);
5703                 break;
5704         case CHIP_VERDE:
5705                 buffer[count++] = cpu_to_le32(0x0000124a);
5706                 break;
5707         case CHIP_OLAND:
5708                 buffer[count++] = cpu_to_le32(0x00000082);
5709                 break;
5710         case CHIP_HAINAN:
5711                 buffer[count++] = cpu_to_le32(0x00000000);
5712                 break;
5713         default:
5714                 buffer[count++] = cpu_to_le32(0x00000000);
5715                 break;
5716         }
5717
5718         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5719         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5720
5721         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5722         buffer[count++] = cpu_to_le32(0);
5723 }
5724
5725 static void si_init_pg(struct radeon_device *rdev)
5726 {
5727         if (rdev->pg_flags) {
5728                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5729                         si_init_dma_pg(rdev);
5730                 }
5731                 si_init_ao_cu_mask(rdev);
5732                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5733                         si_init_gfx_cgpg(rdev);
5734                 } else {
5735                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5736                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5737                 }
5738                 si_enable_dma_pg(rdev, true);
5739                 si_enable_gfx_cgpg(rdev, true);
5740         } else {
5741                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5742                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5743         }
5744 }
5745
5746 static void si_fini_pg(struct radeon_device *rdev)
5747 {
5748         if (rdev->pg_flags) {
5749                 si_enable_dma_pg(rdev, false);
5750                 si_enable_gfx_cgpg(rdev, false);
5751         }
5752 }
5753
5754 /*
5755  * RLC
5756  */
5757 void si_rlc_reset(struct radeon_device *rdev)
5758 {
5759         u32 tmp = RREG32(GRBM_SOFT_RESET);
5760
5761         tmp |= SOFT_RESET_RLC;
5762         WREG32(GRBM_SOFT_RESET, tmp);
5763         udelay(50);
5764         tmp &= ~SOFT_RESET_RLC;
5765         WREG32(GRBM_SOFT_RESET, tmp);
5766         udelay(50);
5767 }
5768
5769 static void si_rlc_stop(struct radeon_device *rdev)
5770 {
5771         WREG32(RLC_CNTL, 0);
5772
5773         si_enable_gui_idle_interrupt(rdev, false);
5774
5775         si_wait_for_rlc_serdes(rdev);
5776 }
5777
5778 static void si_rlc_start(struct radeon_device *rdev)
5779 {
5780         WREG32(RLC_CNTL, RLC_ENABLE);
5781
5782         si_enable_gui_idle_interrupt(rdev, true);
5783
5784         udelay(50);
5785 }
5786
5787 static bool si_lbpw_supported(struct radeon_device *rdev)
5788 {
5789         u32 tmp;
5790
5791         /* Enable LBPW only for DDR3 */
5792         tmp = RREG32(MC_SEQ_MISC0);
5793         if ((tmp & 0xF0000000) == 0xB0000000)
5794                 return true;
5795         return false;
5796 }
5797
5798 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5799 {
5800         u32 tmp;
5801
5802         tmp = RREG32(RLC_LB_CNTL);
5803         if (enable)
5804                 tmp |= LOAD_BALANCE_ENABLE;
5805         else
5806                 tmp &= ~LOAD_BALANCE_ENABLE;
5807         WREG32(RLC_LB_CNTL, tmp);
5808
5809         if (!enable) {
5810                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5811                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5812         }
5813 }
5814
5815 static int si_rlc_resume(struct radeon_device *rdev)
5816 {
5817         u32 i;
5818
5819         if (!rdev->rlc_fw)
5820                 return -EINVAL;
5821
5822         si_rlc_stop(rdev);
5823
5824         si_rlc_reset(rdev);
5825
5826         si_init_pg(rdev);
5827
5828         si_init_cg(rdev);
5829
5830         WREG32(RLC_RL_BASE, 0);
5831         WREG32(RLC_RL_SIZE, 0);
5832         WREG32(RLC_LB_CNTL, 0);
5833         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5834         WREG32(RLC_LB_CNTR_INIT, 0);
5835         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5836
5837         WREG32(RLC_MC_CNTL, 0);
5838         WREG32(RLC_UCODE_CNTL, 0);
5839
5840         if (rdev->new_fw) {
5841                 const struct rlc_firmware_header_v1_0 *hdr =
5842                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5843                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5844                 const __le32 *fw_data = (const __le32 *)
5845                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5846
5847                 radeon_ucode_print_rlc_hdr(&hdr->header);
5848
5849                 for (i = 0; i < fw_size; i++) {
5850                         WREG32(RLC_UCODE_ADDR, i);
5851                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5852                 }
5853         } else {
5854                 const __be32 *fw_data =
5855                         (const __be32 *)rdev->rlc_fw->data;
5856                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5857                         WREG32(RLC_UCODE_ADDR, i);
5858                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5859                 }
5860         }
5861         WREG32(RLC_UCODE_ADDR, 0);
5862
5863         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5864
5865         si_rlc_start(rdev);
5866
5867         return 0;
5868 }
5869
5870 static void si_enable_interrupts(struct radeon_device *rdev)
5871 {
5872         u32 ih_cntl = RREG32(IH_CNTL);
5873         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5874
5875         ih_cntl |= ENABLE_INTR;
5876         ih_rb_cntl |= IH_RB_ENABLE;
5877         WREG32(IH_CNTL, ih_cntl);
5878         WREG32(IH_RB_CNTL, ih_rb_cntl);
5879         rdev->ih.enabled = true;
5880 }
5881
5882 static void si_disable_interrupts(struct radeon_device *rdev)
5883 {
5884         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5885         u32 ih_cntl = RREG32(IH_CNTL);
5886
5887         ih_rb_cntl &= ~IH_RB_ENABLE;
5888         ih_cntl &= ~ENABLE_INTR;
5889         WREG32(IH_RB_CNTL, ih_rb_cntl);
5890         WREG32(IH_CNTL, ih_cntl);
5891         /* set rptr, wptr to 0 */
5892         WREG32(IH_RB_RPTR, 0);
5893         WREG32(IH_RB_WPTR, 0);
5894         rdev->ih.enabled = false;
5895         rdev->ih.rptr = 0;
5896 }
5897
5898 static void si_disable_interrupt_state(struct radeon_device *rdev)
5899 {
5900         u32 tmp;
5901
5902         tmp = RREG32(CP_INT_CNTL_RING0) &
5903                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5904         WREG32(CP_INT_CNTL_RING0, tmp);
5905         WREG32(CP_INT_CNTL_RING1, 0);
5906         WREG32(CP_INT_CNTL_RING2, 0);
5907         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5908         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5909         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5910         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5911         WREG32(GRBM_INT_CNTL, 0);
5912         if (rdev->num_crtc >= 2) {
5913                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5914                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5915         }
5916         if (rdev->num_crtc >= 4) {
5917                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5918                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5919         }
5920         if (rdev->num_crtc >= 6) {
5921                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5922                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5923         }
5924
5925         if (rdev->num_crtc >= 2) {
5926                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5927                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5928         }
5929         if (rdev->num_crtc >= 4) {
5930                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5931                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5932         }
5933         if (rdev->num_crtc >= 6) {
5934                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5935                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5936         }
5937
5938         if (!ASIC_IS_NODCE(rdev)) {
5939                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5940
5941                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5942                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5943                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5944                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5945                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5946                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5947                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5948                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5949                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5950                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5951                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5952                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5953         }
5954 }
5955
5956 static int si_irq_init(struct radeon_device *rdev)
5957 {
5958         int ret = 0;
5959         int rb_bufsz;
5960         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5961
5962         /* allocate ring */
5963         ret = r600_ih_ring_alloc(rdev);
5964         if (ret)
5965                 return ret;
5966
5967         /* disable irqs */
5968         si_disable_interrupts(rdev);
5969
5970         /* init rlc */
5971         ret = si_rlc_resume(rdev);
5972         if (ret) {
5973                 r600_ih_ring_fini(rdev);
5974                 return ret;
5975         }
5976
5977         /* setup interrupt control */
5978         /* set dummy read address to ring address */
5979         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5980         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5981         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5982          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5983          */
5984         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5985         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5986         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5987         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5988
5989         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5990         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5991
5992         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5993                       IH_WPTR_OVERFLOW_CLEAR |
5994                       (rb_bufsz << 1));
5995
5996         if (rdev->wb.enabled)
5997                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5998
5999         /* set the writeback address whether it's enabled or not */
6000         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6001         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6002
6003         WREG32(IH_RB_CNTL, ih_rb_cntl);
6004
6005         /* set rptr, wptr to 0 */
6006         WREG32(IH_RB_RPTR, 0);
6007         WREG32(IH_RB_WPTR, 0);
6008
6009         /* Default settings for IH_CNTL (disabled at first) */
6010         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6011         /* RPTR_REARM only works if msi's are enabled */
6012         if (rdev->msi_enabled)
6013                 ih_cntl |= RPTR_REARM;
6014         WREG32(IH_CNTL, ih_cntl);
6015
6016         /* force the active interrupt state to all disabled */
6017         si_disable_interrupt_state(rdev);
6018
6019         pci_set_master(rdev->pdev);
6020
6021         /* enable irqs */
6022         si_enable_interrupts(rdev);
6023
6024         return ret;
6025 }
6026
6027 int si_irq_set(struct radeon_device *rdev)
6028 {
6029         u32 cp_int_cntl;
6030         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6031         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6032         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6033         u32 grbm_int_cntl = 0;
6034         u32 dma_cntl, dma_cntl1;
6035         u32 thermal_int = 0;
6036
6037         if (!rdev->irq.installed) {
6038                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6039                 return -EINVAL;
6040         }
6041         /* don't enable anything if the ih is disabled */
6042         if (!rdev->ih.enabled) {
6043                 si_disable_interrupts(rdev);
6044                 /* force the active interrupt state to all disabled */
6045                 si_disable_interrupt_state(rdev);
6046                 return 0;
6047         }
6048
6049         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6050                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6051
6052         if (!ASIC_IS_NODCE(rdev)) {
6053                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6054                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6055                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6056                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6057                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6058                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6059         }
6060
6061         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6062         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6063
6064         thermal_int = RREG32(CG_THERMAL_INT) &
6065                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6066
6067         /* enable CP interrupts on all rings */
6068         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6069                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6070                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6071         }
6072         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6073                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6074                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6075         }
6076         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6077                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6078                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6079         }
6080         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6081                 DRM_DEBUG("si_irq_set: sw int dma\n");
6082                 dma_cntl |= TRAP_ENABLE;
6083         }
6084
6085         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6086                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6087                 dma_cntl1 |= TRAP_ENABLE;
6088         }
6089         if (rdev->irq.crtc_vblank_int[0] ||
6090             atomic_read(&rdev->irq.pflip[0])) {
6091                 DRM_DEBUG("si_irq_set: vblank 0\n");
6092                 crtc1 |= VBLANK_INT_MASK;
6093         }
6094         if (rdev->irq.crtc_vblank_int[1] ||
6095             atomic_read(&rdev->irq.pflip[1])) {
6096                 DRM_DEBUG("si_irq_set: vblank 1\n");
6097                 crtc2 |= VBLANK_INT_MASK;
6098         }
6099         if (rdev->irq.crtc_vblank_int[2] ||
6100             atomic_read(&rdev->irq.pflip[2])) {
6101                 DRM_DEBUG("si_irq_set: vblank 2\n");
6102                 crtc3 |= VBLANK_INT_MASK;
6103         }
6104         if (rdev->irq.crtc_vblank_int[3] ||
6105             atomic_read(&rdev->irq.pflip[3])) {
6106                 DRM_DEBUG("si_irq_set: vblank 3\n");
6107                 crtc4 |= VBLANK_INT_MASK;
6108         }
6109         if (rdev->irq.crtc_vblank_int[4] ||
6110             atomic_read(&rdev->irq.pflip[4])) {
6111                 DRM_DEBUG("si_irq_set: vblank 4\n");
6112                 crtc5 |= VBLANK_INT_MASK;
6113         }
6114         if (rdev->irq.crtc_vblank_int[5] ||
6115             atomic_read(&rdev->irq.pflip[5])) {
6116                 DRM_DEBUG("si_irq_set: vblank 5\n");
6117                 crtc6 |= VBLANK_INT_MASK;
6118         }
6119         if (rdev->irq.hpd[0]) {
6120                 DRM_DEBUG("si_irq_set: hpd 1\n");
6121                 hpd1 |= DC_HPDx_INT_EN;
6122         }
6123         if (rdev->irq.hpd[1]) {
6124                 DRM_DEBUG("si_irq_set: hpd 2\n");
6125                 hpd2 |= DC_HPDx_INT_EN;
6126         }
6127         if (rdev->irq.hpd[2]) {
6128                 DRM_DEBUG("si_irq_set: hpd 3\n");
6129                 hpd3 |= DC_HPDx_INT_EN;
6130         }
6131         if (rdev->irq.hpd[3]) {
6132                 DRM_DEBUG("si_irq_set: hpd 4\n");
6133                 hpd4 |= DC_HPDx_INT_EN;
6134         }
6135         if (rdev->irq.hpd[4]) {
6136                 DRM_DEBUG("si_irq_set: hpd 5\n");
6137                 hpd5 |= DC_HPDx_INT_EN;
6138         }
6139         if (rdev->irq.hpd[5]) {
6140                 DRM_DEBUG("si_irq_set: hpd 6\n");
6141                 hpd6 |= DC_HPDx_INT_EN;
6142         }
6143
6144         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6145         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6146         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6147
6148         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6149         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6150
6151         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6152
6153         if (rdev->irq.dpm_thermal) {
6154                 DRM_DEBUG("dpm thermal\n");
6155                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6156         }
6157
6158         if (rdev->num_crtc >= 2) {
6159                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6160                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6161         }
6162         if (rdev->num_crtc >= 4) {
6163                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6164                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6165         }
6166         if (rdev->num_crtc >= 6) {
6167                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6168                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6169         }
6170
6171         if (rdev->num_crtc >= 2) {
6172                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6173                        GRPH_PFLIP_INT_MASK);
6174                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6175                        GRPH_PFLIP_INT_MASK);
6176         }
6177         if (rdev->num_crtc >= 4) {
6178                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6179                        GRPH_PFLIP_INT_MASK);
6180                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6181                        GRPH_PFLIP_INT_MASK);
6182         }
6183         if (rdev->num_crtc >= 6) {
6184                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6185                        GRPH_PFLIP_INT_MASK);
6186                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6187                        GRPH_PFLIP_INT_MASK);
6188         }
6189
6190         if (!ASIC_IS_NODCE(rdev)) {
6191                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6192                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6193                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6194                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6195                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6196                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6197         }
6198
6199         WREG32(CG_THERMAL_INT, thermal_int);
6200
6201         return 0;
6202 }
6203
6204 static inline void si_irq_ack(struct radeon_device *rdev)
6205 {
6206         u32 tmp;
6207
6208         if (ASIC_IS_NODCE(rdev))
6209                 return;
6210
6211         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6212         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6213         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6214         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6215         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6216         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6217         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6218         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6219         if (rdev->num_crtc >= 4) {
6220                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6221                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6222         }
6223         if (rdev->num_crtc >= 6) {
6224                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6225                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6226         }
6227
6228         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6229                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6230         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6231                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6232         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6233                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6234         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6235                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6236         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6237                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6238         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6239                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6240
6241         if (rdev->num_crtc >= 4) {
6242                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6243                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6244                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6245                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6246                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6247                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6248                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6249                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6250                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6251                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6252                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6253                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6254         }
6255
6256         if (rdev->num_crtc >= 6) {
6257                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6258                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6259                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6260                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6261                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6262                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6263                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6264                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6265                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6266                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6267                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6268                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6269         }
6270
6271         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6272                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6273                 tmp |= DC_HPDx_INT_ACK;
6274                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6275         }
6276         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6277                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6278                 tmp |= DC_HPDx_INT_ACK;
6279                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6280         }
6281         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6282                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6283                 tmp |= DC_HPDx_INT_ACK;
6284                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6285         }
6286         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6287                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6288                 tmp |= DC_HPDx_INT_ACK;
6289                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6290         }
6291         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6292                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6293                 tmp |= DC_HPDx_INT_ACK;
6294                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6295         }
6296         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6297                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6298                 tmp |= DC_HPDx_INT_ACK;
6299                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6300         }
6301 }
6302
6303 static void si_irq_disable(struct radeon_device *rdev)
6304 {
6305         si_disable_interrupts(rdev);
6306         /* Wait and acknowledge irq */
6307         mdelay(1);
6308         si_irq_ack(rdev);
6309         si_disable_interrupt_state(rdev);
6310 }
6311
6312 static void si_irq_suspend(struct radeon_device *rdev)
6313 {
6314         si_irq_disable(rdev);
6315         si_rlc_stop(rdev);
6316 }
6317
6318 static void si_irq_fini(struct radeon_device *rdev)
6319 {
6320         si_irq_suspend(rdev);
6321         r600_ih_ring_fini(rdev);
6322 }
6323
6324 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6325 {
6326         u32 wptr, tmp;
6327
6328         if (rdev->wb.enabled)
6329                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6330         else
6331                 wptr = RREG32(IH_RB_WPTR);
6332
6333         if (wptr & RB_OVERFLOW) {
6334                 wptr &= ~RB_OVERFLOW;
6335                 /* When a ring buffer overflow happen start parsing interrupt
6336                  * from the last not overwritten vector (wptr + 16). Hopefully
6337                  * this should allow us to catchup.
6338                  */
6339                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6340                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6341                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6342                 tmp = RREG32(IH_RB_CNTL);
6343                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6344                 WREG32(IH_RB_CNTL, tmp);
6345         }
6346         return (wptr & rdev->ih.ptr_mask);
6347 }
6348
6349 /*        SI IV Ring
6350  * Each IV ring entry is 128 bits:
6351  * [7:0]    - interrupt source id
6352  * [31:8]   - reserved
6353  * [59:32]  - interrupt source data
6354  * [63:60]  - reserved
6355  * [71:64]  - RINGID
6356  * [79:72]  - VMID
6357  * [127:80] - reserved
6358  */
6359 int si_irq_process(struct radeon_device *rdev)
6360 {
6361         u32 wptr;
6362         u32 rptr;
6363         u32 src_id, src_data, ring_id;
6364         u32 ring_index;
6365         bool queue_hotplug = false;
6366         bool queue_thermal = false;
6367         u32 status, addr;
6368
6369         if (!rdev->ih.enabled || rdev->shutdown)
6370                 return IRQ_NONE;
6371
6372         wptr = si_get_ih_wptr(rdev);
6373
6374 restart_ih:
6375         /* is somebody else already processing irqs? */
6376         if (atomic_xchg(&rdev->ih.lock, 1))
6377                 return IRQ_NONE;
6378
6379         rptr = rdev->ih.rptr;
6380         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6381
6382         /* Order reading of wptr vs. reading of IH ring data */
6383         rmb();
6384
6385         /* display interrupts */
6386         si_irq_ack(rdev);
6387
6388         while (rptr != wptr) {
6389                 /* wptr/rptr are in bytes! */
6390                 ring_index = rptr / 4;
6391                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6392                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6393                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6394
6395                 switch (src_id) {
6396                 case 1: /* D1 vblank/vline */
6397                         switch (src_data) {
6398                         case 0: /* D1 vblank */
6399                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6400                                         if (rdev->irq.crtc_vblank_int[0]) {
6401                                                 drm_handle_vblank(rdev->ddev, 0);
6402                                                 rdev->pm.vblank_sync = true;
6403                                                 wake_up(&rdev->irq.vblank_queue);
6404                                         }
6405                                         if (atomic_read(&rdev->irq.pflip[0]))
6406                                                 radeon_crtc_handle_vblank(rdev, 0);
6407                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6408                                         DRM_DEBUG("IH: D1 vblank\n");
6409                                 }
6410                                 break;
6411                         case 1: /* D1 vline */
6412                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6413                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6414                                         DRM_DEBUG("IH: D1 vline\n");
6415                                 }
6416                                 break;
6417                         default:
6418                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6419                                 break;
6420                         }
6421                         break;
6422                 case 2: /* D2 vblank/vline */
6423                         switch (src_data) {
6424                         case 0: /* D2 vblank */
6425                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6426                                         if (rdev->irq.crtc_vblank_int[1]) {
6427                                                 drm_handle_vblank(rdev->ddev, 1);
6428                                                 rdev->pm.vblank_sync = true;
6429                                                 wake_up(&rdev->irq.vblank_queue);
6430                                         }
6431                                         if (atomic_read(&rdev->irq.pflip[1]))
6432                                                 radeon_crtc_handle_vblank(rdev, 1);
6433                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6434                                         DRM_DEBUG("IH: D2 vblank\n");
6435                                 }
6436                                 break;
6437                         case 1: /* D2 vline */
6438                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6439                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6440                                         DRM_DEBUG("IH: D2 vline\n");
6441                                 }
6442                                 break;
6443                         default:
6444                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6445                                 break;
6446                         }
6447                         break;
6448                 case 3: /* D3 vblank/vline */
6449                         switch (src_data) {
6450                         case 0: /* D3 vblank */
6451                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6452                                         if (rdev->irq.crtc_vblank_int[2]) {
6453                                                 drm_handle_vblank(rdev->ddev, 2);
6454                                                 rdev->pm.vblank_sync = true;
6455                                                 wake_up(&rdev->irq.vblank_queue);
6456                                         }
6457                                         if (atomic_read(&rdev->irq.pflip[2]))
6458                                                 radeon_crtc_handle_vblank(rdev, 2);
6459                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6460                                         DRM_DEBUG("IH: D3 vblank\n");
6461                                 }
6462                                 break;
6463                         case 1: /* D3 vline */
6464                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6465                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6466                                         DRM_DEBUG("IH: D3 vline\n");
6467                                 }
6468                                 break;
6469                         default:
6470                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6471                                 break;
6472                         }
6473                         break;
6474                 case 4: /* D4 vblank/vline */
6475                         switch (src_data) {
6476                         case 0: /* D4 vblank */
6477                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6478                                         if (rdev->irq.crtc_vblank_int[3]) {
6479                                                 drm_handle_vblank(rdev->ddev, 3);
6480                                                 rdev->pm.vblank_sync = true;
6481                                                 wake_up(&rdev->irq.vblank_queue);
6482                                         }
6483                                         if (atomic_read(&rdev->irq.pflip[3]))
6484                                                 radeon_crtc_handle_vblank(rdev, 3);
6485                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6486                                         DRM_DEBUG("IH: D4 vblank\n");
6487                                 }
6488                                 break;
6489                         case 1: /* D4 vline */
6490                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6491                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6492                                         DRM_DEBUG("IH: D4 vline\n");
6493                                 }
6494                                 break;
6495                         default:
6496                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6497                                 break;
6498                         }
6499                         break;
6500                 case 5: /* D5 vblank/vline */
6501                         switch (src_data) {
6502                         case 0: /* D5 vblank */
6503                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6504                                         if (rdev->irq.crtc_vblank_int[4]) {
6505                                                 drm_handle_vblank(rdev->ddev, 4);
6506                                                 rdev->pm.vblank_sync = true;
6507                                                 wake_up(&rdev->irq.vblank_queue);
6508                                         }
6509                                         if (atomic_read(&rdev->irq.pflip[4]))
6510                                                 radeon_crtc_handle_vblank(rdev, 4);
6511                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6512                                         DRM_DEBUG("IH: D5 vblank\n");
6513                                 }
6514                                 break;
6515                         case 1: /* D5 vline */
6516                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6517                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6518                                         DRM_DEBUG("IH: D5 vline\n");
6519                                 }
6520                                 break;
6521                         default:
6522                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6523                                 break;
6524                         }
6525                         break;
6526                 case 6: /* D6 vblank/vline */
6527                         switch (src_data) {
6528                         case 0: /* D6 vblank */
6529                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6530                                         if (rdev->irq.crtc_vblank_int[5]) {
6531                                                 drm_handle_vblank(rdev->ddev, 5);
6532                                                 rdev->pm.vblank_sync = true;
6533                                                 wake_up(&rdev->irq.vblank_queue);
6534                                         }
6535                                         if (atomic_read(&rdev->irq.pflip[5]))
6536                                                 radeon_crtc_handle_vblank(rdev, 5);
6537                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6538                                         DRM_DEBUG("IH: D6 vblank\n");
6539                                 }
6540                                 break;
6541                         case 1: /* D6 vline */
6542                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6543                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6544                                         DRM_DEBUG("IH: D6 vline\n");
6545                                 }
6546                                 break;
6547                         default:
6548                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6549                                 break;
6550                         }
6551                         break;
6552                 case 8: /* D1 page flip */
6553                 case 10: /* D2 page flip */
6554                 case 12: /* D3 page flip */
6555                 case 14: /* D4 page flip */
6556                 case 16: /* D5 page flip */
6557                 case 18: /* D6 page flip */
6558                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6559                         if (radeon_use_pflipirq > 0)
6560                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6561                         break;
6562                 case 42: /* HPD hotplug */
6563                         switch (src_data) {
6564                         case 0:
6565                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6566                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6567                                         queue_hotplug = true;
6568                                         DRM_DEBUG("IH: HPD1\n");
6569                                 }
6570                                 break;
6571                         case 1:
6572                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6573                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6574                                         queue_hotplug = true;
6575                                         DRM_DEBUG("IH: HPD2\n");
6576                                 }
6577                                 break;
6578                         case 2:
6579                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6580                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6581                                         queue_hotplug = true;
6582                                         DRM_DEBUG("IH: HPD3\n");
6583                                 }
6584                                 break;
6585                         case 3:
6586                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6587                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6588                                         queue_hotplug = true;
6589                                         DRM_DEBUG("IH: HPD4\n");
6590                                 }
6591                                 break;
6592                         case 4:
6593                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6594                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6595                                         queue_hotplug = true;
6596                                         DRM_DEBUG("IH: HPD5\n");
6597                                 }
6598                                 break;
6599                         case 5:
6600                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6601                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6602                                         queue_hotplug = true;
6603                                         DRM_DEBUG("IH: HPD6\n");
6604                                 }
6605                                 break;
6606                         default:
6607                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6608                                 break;
6609                         }
6610                         break;
6611                 case 124: /* UVD */
6612                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6613                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6614                         break;
6615                 case 146:
6616                 case 147:
6617                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6618                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6619                         /* reset addr and status */
6620                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6621                         if (addr == 0x0 && status == 0x0)
6622                                 break;
6623                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6624                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6625                                 addr);
6626                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6627                                 status);
6628                         si_vm_decode_fault(rdev, status, addr);
6629                         break;
6630                 case 176: /* RINGID0 CP_INT */
6631                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6632                         break;
6633                 case 177: /* RINGID1 CP_INT */
6634                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6635                         break;
6636                 case 178: /* RINGID2 CP_INT */
6637                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6638                         break;
6639                 case 181: /* CP EOP event */
6640                         DRM_DEBUG("IH: CP EOP\n");
6641                         switch (ring_id) {
6642                         case 0:
6643                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6644                                 break;
6645                         case 1:
6646                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6647                                 break;
6648                         case 2:
6649                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6650                                 break;
6651                         }
6652                         break;
6653                 case 224: /* DMA trap event */
6654                         DRM_DEBUG("IH: DMA trap\n");
6655                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6656                         break;
6657                 case 230: /* thermal low to high */
6658                         DRM_DEBUG("IH: thermal low to high\n");
6659                         rdev->pm.dpm.thermal.high_to_low = false;
6660                         queue_thermal = true;
6661                         break;
6662                 case 231: /* thermal high to low */
6663                         DRM_DEBUG("IH: thermal high to low\n");
6664                         rdev->pm.dpm.thermal.high_to_low = true;
6665                         queue_thermal = true;
6666                         break;
6667                 case 233: /* GUI IDLE */
6668                         DRM_DEBUG("IH: GUI idle\n");
6669                         break;
6670                 case 244: /* DMA trap event */
6671                         DRM_DEBUG("IH: DMA1 trap\n");
6672                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6673                         break;
6674                 default:
6675                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6676                         break;
6677                 }
6678
6679                 /* wptr/rptr are in bytes! */
6680                 rptr += 16;
6681                 rptr &= rdev->ih.ptr_mask;
6682                 WREG32(IH_RB_RPTR, rptr);
6683         }
6684         if (queue_hotplug)
6685                 schedule_work(&rdev->hotplug_work);
6686         if (queue_thermal && rdev->pm.dpm_enabled)
6687                 schedule_work(&rdev->pm.dpm.thermal.work);
6688         rdev->ih.rptr = rptr;
6689         atomic_set(&rdev->ih.lock, 0);
6690
6691         /* make sure wptr hasn't changed while processing */
6692         wptr = si_get_ih_wptr(rdev);
6693         if (wptr != rptr)
6694                 goto restart_ih;
6695
6696         return IRQ_HANDLED;
6697 }
6698
6699 /*
6700  * startup/shutdown callbacks
6701  */
6702 static int si_startup(struct radeon_device *rdev)
6703 {
6704         struct radeon_ring *ring;
6705         int r;
6706
6707         /* enable pcie gen2/3 link */
6708         si_pcie_gen3_enable(rdev);
6709         /* enable aspm */
6710         si_program_aspm(rdev);
6711
6712         /* scratch needs to be initialized before MC */
6713         r = r600_vram_scratch_init(rdev);
6714         if (r)
6715                 return r;
6716
6717         si_mc_program(rdev);
6718
6719         if (!rdev->pm.dpm_enabled) {
6720                 r = si_mc_load_microcode(rdev);
6721                 if (r) {
6722                         DRM_ERROR("Failed to load MC firmware!\n");
6723                         return r;
6724                 }
6725         }
6726
6727         r = si_pcie_gart_enable(rdev);
6728         if (r)
6729                 return r;
6730         si_gpu_init(rdev);
6731
6732         /* allocate rlc buffers */
6733         if (rdev->family == CHIP_VERDE) {
6734                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6735                 rdev->rlc.reg_list_size =
6736                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6737         }
6738         rdev->rlc.cs_data = si_cs_data;
6739         r = sumo_rlc_init(rdev);
6740         if (r) {
6741                 DRM_ERROR("Failed to init rlc BOs!\n");
6742                 return r;
6743         }
6744
6745         /* allocate wb buffer */
6746         r = radeon_wb_init(rdev);
6747         if (r)
6748                 return r;
6749
6750         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6751         if (r) {
6752                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6753                 return r;
6754         }
6755
6756         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6757         if (r) {
6758                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6759                 return r;
6760         }
6761
6762         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6763         if (r) {
6764                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6765                 return r;
6766         }
6767
6768         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6769         if (r) {
6770                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6771                 return r;
6772         }
6773
6774         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6775         if (r) {
6776                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6777                 return r;
6778         }
6779
6780         if (rdev->has_uvd) {
6781                 r = uvd_v2_2_resume(rdev);
6782                 if (!r) {
6783                         r = radeon_fence_driver_start_ring(rdev,
6784                                                            R600_RING_TYPE_UVD_INDEX);
6785                         if (r)
6786                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6787                 }
6788                 if (r)
6789                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6790         }
6791
6792         /* Enable IRQ */
6793         if (!rdev->irq.installed) {
6794                 r = radeon_irq_kms_init(rdev);
6795                 if (r)
6796                         return r;
6797         }
6798
6799         r = si_irq_init(rdev);
6800         if (r) {
6801                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6802                 radeon_irq_kms_fini(rdev);
6803                 return r;
6804         }
6805         si_irq_set(rdev);
6806
6807         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6808         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6809                              RADEON_CP_PACKET2);
6810         if (r)
6811                 return r;
6812
6813         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6814         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6815                              RADEON_CP_PACKET2);
6816         if (r)
6817                 return r;
6818
6819         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6820         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6821                              RADEON_CP_PACKET2);
6822         if (r)
6823                 return r;
6824
6825         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6826         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6827                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6828         if (r)
6829                 return r;
6830
6831         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6832         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6833                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6834         if (r)
6835                 return r;
6836
6837         r = si_cp_load_microcode(rdev);
6838         if (r)
6839                 return r;
6840         r = si_cp_resume(rdev);
6841         if (r)
6842                 return r;
6843
6844         r = cayman_dma_resume(rdev);
6845         if (r)
6846                 return r;
6847
6848         if (rdev->has_uvd) {
6849                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6850                 if (ring->ring_size) {
6851                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6852                                              RADEON_CP_PACKET2);
6853                         if (!r)
6854                                 r = uvd_v1_0_init(rdev);
6855                         if (r)
6856                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6857                 }
6858         }
6859
6860         r = radeon_ib_pool_init(rdev);
6861         if (r) {
6862                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6863                 return r;
6864         }
6865
6866         r = radeon_vm_manager_init(rdev);
6867         if (r) {
6868                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6869                 return r;
6870         }
6871
6872         r = dce6_audio_init(rdev);
6873         if (r)
6874                 return r;
6875
6876         return 0;
6877 }
6878
6879 int si_resume(struct radeon_device *rdev)
6880 {
6881         int r;
6882
6883         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6884          * posting will perform necessary task to bring back GPU into good
6885          * shape.
6886          */
6887         /* post card */
6888         atom_asic_init(rdev->mode_info.atom_context);
6889
6890         /* init golden registers */
6891         si_init_golden_registers(rdev);
6892
6893         if (rdev->pm.pm_method == PM_METHOD_DPM)
6894                 radeon_pm_resume(rdev);
6895
6896         rdev->accel_working = true;
6897         r = si_startup(rdev);
6898         if (r) {
6899                 DRM_ERROR("si startup failed on resume\n");
6900                 rdev->accel_working = false;
6901                 return r;
6902         }
6903
6904         return r;
6905
6906 }
6907
6908 int si_suspend(struct radeon_device *rdev)
6909 {
6910         radeon_pm_suspend(rdev);
6911         dce6_audio_fini(rdev);
6912         radeon_vm_manager_fini(rdev);
6913         si_cp_enable(rdev, false);
6914         cayman_dma_stop(rdev);
6915         if (rdev->has_uvd) {
6916                 uvd_v1_0_fini(rdev);
6917                 radeon_uvd_suspend(rdev);
6918         }
6919         si_fini_pg(rdev);
6920         si_fini_cg(rdev);
6921         si_irq_suspend(rdev);
6922         radeon_wb_disable(rdev);
6923         si_pcie_gart_disable(rdev);
6924         return 0;
6925 }
6926
6927 /* Plan is to move initialization in that function and use
6928  * helper function so that radeon_device_init pretty much
6929  * do nothing more than calling asic specific function. This
6930  * should also allow to remove a bunch of callback function
6931  * like vram_info.
6932  */
6933 int si_init(struct radeon_device *rdev)
6934 {
6935         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6936         int r;
6937
6938         /* Read BIOS */
6939         if (!radeon_get_bios(rdev)) {
6940                 if (ASIC_IS_AVIVO(rdev))
6941                         return -EINVAL;
6942         }
6943         /* Must be an ATOMBIOS */
6944         if (!rdev->is_atom_bios) {
6945                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6946                 return -EINVAL;
6947         }
6948         r = radeon_atombios_init(rdev);
6949         if (r)
6950                 return r;
6951
6952         /* Post card if necessary */
6953         if (!radeon_card_posted(rdev)) {
6954                 if (!rdev->bios) {
6955                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6956                         return -EINVAL;
6957                 }
6958                 DRM_INFO("GPU not posted. posting now...\n");
6959                 atom_asic_init(rdev->mode_info.atom_context);
6960         }
6961         /* init golden registers */
6962         si_init_golden_registers(rdev);
6963         /* Initialize scratch registers */
6964         si_scratch_init(rdev);
6965         /* Initialize surface registers */
6966         radeon_surface_init(rdev);
6967         /* Initialize clocks */
6968         radeon_get_clock_info(rdev->ddev);
6969
6970         /* Fence driver */
6971         r = radeon_fence_driver_init(rdev);
6972         if (r)
6973                 return r;
6974
6975         /* initialize memory controller */
6976         r = si_mc_init(rdev);
6977         if (r)
6978                 return r;
6979         /* Memory manager */
6980         r = radeon_bo_init(rdev);
6981         if (r)
6982                 return r;
6983
6984         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6985             !rdev->rlc_fw || !rdev->mc_fw) {
6986                 r = si_init_microcode(rdev);
6987                 if (r) {
6988                         DRM_ERROR("Failed to load firmware!\n");
6989                         return r;
6990                 }
6991         }
6992
6993         /* Initialize power management */
6994         radeon_pm_init(rdev);
6995
6996         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6997         ring->ring_obj = NULL;
6998         r600_ring_init(rdev, ring, 1024 * 1024);
6999
7000         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7001         ring->ring_obj = NULL;
7002         r600_ring_init(rdev, ring, 1024 * 1024);
7003
7004         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7005         ring->ring_obj = NULL;
7006         r600_ring_init(rdev, ring, 1024 * 1024);
7007
7008         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7009         ring->ring_obj = NULL;
7010         r600_ring_init(rdev, ring, 64 * 1024);
7011
7012         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7013         ring->ring_obj = NULL;
7014         r600_ring_init(rdev, ring, 64 * 1024);
7015
7016         if (rdev->has_uvd) {
7017                 r = radeon_uvd_init(rdev);
7018                 if (!r) {
7019                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7020                         ring->ring_obj = NULL;
7021                         r600_ring_init(rdev, ring, 4096);
7022                 }
7023         }
7024
7025         rdev->ih.ring_obj = NULL;
7026         r600_ih_ring_init(rdev, 64 * 1024);
7027
7028         r = r600_pcie_gart_init(rdev);
7029         if (r)
7030                 return r;
7031
7032         rdev->accel_working = true;
7033         r = si_startup(rdev);
7034         if (r) {
7035                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7036                 si_cp_fini(rdev);
7037                 cayman_dma_fini(rdev);
7038                 si_irq_fini(rdev);
7039                 sumo_rlc_fini(rdev);
7040                 radeon_wb_fini(rdev);
7041                 radeon_ib_pool_fini(rdev);
7042                 radeon_vm_manager_fini(rdev);
7043                 radeon_irq_kms_fini(rdev);
7044                 si_pcie_gart_fini(rdev);
7045                 rdev->accel_working = false;
7046         }
7047
7048         /* Don't start up if the MC ucode is missing.
7049          * The default clocks and voltages before the MC ucode
7050          * is loaded are not suffient for advanced operations.
7051          */
7052         if (!rdev->mc_fw) {
7053                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7054                 return -EINVAL;
7055         }
7056
7057         return 0;
7058 }
7059
7060 void si_fini(struct radeon_device *rdev)
7061 {
7062         radeon_pm_fini(rdev);
7063         si_cp_fini(rdev);
7064         cayman_dma_fini(rdev);
7065         si_fini_pg(rdev);
7066         si_fini_cg(rdev);
7067         si_irq_fini(rdev);
7068         sumo_rlc_fini(rdev);
7069         radeon_wb_fini(rdev);
7070         radeon_vm_manager_fini(rdev);
7071         radeon_ib_pool_fini(rdev);
7072         radeon_irq_kms_fini(rdev);
7073         if (rdev->has_uvd) {
7074                 uvd_v1_0_fini(rdev);
7075                 radeon_uvd_fini(rdev);
7076         }
7077         si_pcie_gart_fini(rdev);
7078         r600_vram_scratch_fini(rdev);
7079         radeon_gem_fini(rdev);
7080         radeon_fence_driver_fini(rdev);
7081         radeon_bo_fini(rdev);
7082         radeon_atombios_fini(rdev);
7083         kfree(rdev->bios);
7084         rdev->bios = NULL;
7085 }
7086
7087 /**
7088  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7089  *
7090  * @rdev: radeon_device pointer
7091  *
7092  * Fetches a GPU clock counter snapshot (SI).
7093  * Returns the 64 bit clock counter snapshot.
7094  */
7095 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7096 {
7097         uint64_t clock;
7098
7099         mutex_lock(&rdev->gpu_clock_mutex);
7100         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7101         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7102                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7103         mutex_unlock(&rdev->gpu_clock_mutex);
7104         return clock;
7105 }
7106
7107 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7108 {
7109         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7110         int r;
7111
7112         /* bypass vclk and dclk with bclk */
7113         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7114                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7115                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7116
7117         /* put PLL in bypass mode */
7118         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7119
7120         if (!vclk || !dclk) {
7121                 /* keep the Bypass mode, put PLL to sleep */
7122                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7123                 return 0;
7124         }
7125
7126         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7127                                           16384, 0x03FFFFFF, 0, 128, 5,
7128                                           &fb_div, &vclk_div, &dclk_div);
7129         if (r)
7130                 return r;
7131
7132         /* set RESET_ANTI_MUX to 0 */
7133         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7134
7135         /* set VCO_MODE to 1 */
7136         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7137
7138         /* toggle UPLL_SLEEP to 1 then back to 0 */
7139         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7140         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7141
7142         /* deassert UPLL_RESET */
7143         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7144
7145         mdelay(1);
7146
7147         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7148         if (r)
7149                 return r;
7150
7151         /* assert UPLL_RESET again */
7152         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7153
7154         /* disable spread spectrum. */
7155         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7156
7157         /* set feedback divider */
7158         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7159
7160         /* set ref divider to 0 */
7161         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7162
7163         if (fb_div < 307200)
7164                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7165         else
7166                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7167
7168         /* set PDIV_A and PDIV_B */
7169         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7170                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7171                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7172
7173         /* give the PLL some time to settle */
7174         mdelay(15);
7175
7176         /* deassert PLL_RESET */
7177         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7178
7179         mdelay(15);
7180
7181         /* switch from bypass mode to normal mode */
7182         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7183
7184         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7185         if (r)
7186                 return r;
7187
7188         /* switch VCLK and DCLK selection */
7189         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7190                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7191                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7192
7193         mdelay(100);
7194
7195         return 0;
7196 }
7197
7198 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7199 {
7200         struct pci_dev *root = rdev->pdev->bus->self;
7201         int bridge_pos, gpu_pos;
7202         u32 speed_cntl, mask, current_data_rate;
7203         int ret, i;
7204         u16 tmp16;
7205
7206         if (pci_is_root_bus(rdev->pdev->bus))
7207                 return;
7208
7209         if (radeon_pcie_gen2 == 0)
7210                 return;
7211
7212         if (rdev->flags & RADEON_IS_IGP)
7213                 return;
7214
7215         if (!(rdev->flags & RADEON_IS_PCIE))
7216                 return;
7217
7218         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7219         if (ret != 0)
7220                 return;
7221
7222         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7223                 return;
7224
7225         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7226         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7227                 LC_CURRENT_DATA_RATE_SHIFT;
7228         if (mask & DRM_PCIE_SPEED_80) {
7229                 if (current_data_rate == 2) {
7230                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7231                         return;
7232                 }
7233                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7234         } else if (mask & DRM_PCIE_SPEED_50) {
7235                 if (current_data_rate == 1) {
7236                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7237                         return;
7238                 }
7239                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7240         }
7241
7242         bridge_pos = pci_pcie_cap(root);
7243         if (!bridge_pos)
7244                 return;
7245
7246         gpu_pos = pci_pcie_cap(rdev->pdev);
7247         if (!gpu_pos)
7248                 return;
7249
7250         if (mask & DRM_PCIE_SPEED_80) {
7251                 /* re-try equalization if gen3 is not already enabled */
7252                 if (current_data_rate != 2) {
7253                         u16 bridge_cfg, gpu_cfg;
7254                         u16 bridge_cfg2, gpu_cfg2;
7255                         u32 max_lw, current_lw, tmp;
7256
7257                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7258                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7259
7260                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7261                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7262
7263                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7264                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7265
7266                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7267                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7268                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7269
7270                         if (current_lw < max_lw) {
7271                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7272                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7273                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7274                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7275                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7276                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7277                                 }
7278                         }
7279
7280                         for (i = 0; i < 10; i++) {
7281                                 /* check status */
7282                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7283                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7284                                         break;
7285
7286                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7287                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7288
7289                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7290                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7291
7292                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7293                                 tmp |= LC_SET_QUIESCE;
7294                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7295
7296                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7297                                 tmp |= LC_REDO_EQ;
7298                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7299
7300                                 mdelay(100);
7301
7302                                 /* linkctl */
7303                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7304                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7305                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7306                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7307
7308                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7309                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7310                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7311                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7312
7313                                 /* linkctl2 */
7314                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7315                                 tmp16 &= ~((1 << 4) | (7 << 9));
7316                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7317                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7318
7319                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7320                                 tmp16 &= ~((1 << 4) | (7 << 9));
7321                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7322                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7323
7324                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7325                                 tmp &= ~LC_SET_QUIESCE;
7326                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7327                         }
7328                 }
7329         }
7330
7331         /* set the link speed */
7332         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7333         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7334         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7335
7336         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7337         tmp16 &= ~0xf;
7338         if (mask & DRM_PCIE_SPEED_80)
7339                 tmp16 |= 3; /* gen3 */
7340         else if (mask & DRM_PCIE_SPEED_50)
7341                 tmp16 |= 2; /* gen2 */
7342         else
7343                 tmp16 |= 1; /* gen1 */
7344         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7345
7346         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7347         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7348         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7349
7350         for (i = 0; i < rdev->usec_timeout; i++) {
7351                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7352                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7353                         break;
7354                 udelay(1);
7355         }
7356 }
7357
7358 static void si_program_aspm(struct radeon_device *rdev)
7359 {
7360         u32 data, orig;
7361         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7362         bool disable_clkreq = false;
7363
7364         if (radeon_aspm == 0)
7365                 return;
7366
7367         if (!(rdev->flags & RADEON_IS_PCIE))
7368                 return;
7369
7370         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7371         data &= ~LC_XMIT_N_FTS_MASK;
7372         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7373         if (orig != data)
7374                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7375
7376         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7377         data |= LC_GO_TO_RECOVERY;
7378         if (orig != data)
7379                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7380
7381         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7382         data |= P_IGNORE_EDB_ERR;
7383         if (orig != data)
7384                 WREG32_PCIE(PCIE_P_CNTL, data);
7385
7386         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7387         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7388         data |= LC_PMI_TO_L1_DIS;
7389         if (!disable_l0s)
7390                 data |= LC_L0S_INACTIVITY(7);
7391
7392         if (!disable_l1) {
7393                 data |= LC_L1_INACTIVITY(7);
7394                 data &= ~LC_PMI_TO_L1_DIS;
7395                 if (orig != data)
7396                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7397
7398                 if (!disable_plloff_in_l1) {
7399                         bool clk_req_support;
7400
7401                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7402                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7403                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7404                         if (orig != data)
7405                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7406
7407                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7408                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7409                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7410                         if (orig != data)
7411                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7412
7413                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7414                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7415                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7416                         if (orig != data)
7417                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7418
7419                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7420                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7421                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7422                         if (orig != data)
7423                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7424
7425                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7426                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7427                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7428                                 if (orig != data)
7429                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7430
7431                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7432                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7433                                 if (orig != data)
7434                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7435
7436                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7437                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7438                                 if (orig != data)
7439                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7440
7441                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7442                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7443                                 if (orig != data)
7444                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7445
7446                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7447                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7448                                 if (orig != data)
7449                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7450
7451                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7452                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7453                                 if (orig != data)
7454                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7455
7456                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7457                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7458                                 if (orig != data)
7459                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7460
7461                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7462                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7463                                 if (orig != data)
7464                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7465                         }
7466                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7467                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7468                         data |= LC_DYN_LANES_PWR_STATE(3);
7469                         if (orig != data)
7470                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7471
7472                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7473                         data &= ~LS2_EXIT_TIME_MASK;
7474                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7475                                 data |= LS2_EXIT_TIME(5);
7476                         if (orig != data)
7477                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7478
7479                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7480                         data &= ~LS2_EXIT_TIME_MASK;
7481                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7482                                 data |= LS2_EXIT_TIME(5);
7483                         if (orig != data)
7484                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7485
7486                         if (!disable_clkreq &&
7487                             !pci_is_root_bus(rdev->pdev->bus)) {
7488                                 struct pci_dev *root = rdev->pdev->bus->self;
7489                                 u32 lnkcap;
7490
7491                                 clk_req_support = false;
7492                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7493                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7494                                         clk_req_support = true;
7495                         } else {
7496                                 clk_req_support = false;
7497                         }
7498
7499                         if (clk_req_support) {
7500                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7501                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7502                                 if (orig != data)
7503                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7504
7505                                 orig = data = RREG32(THM_CLK_CNTL);
7506                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7507                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7508                                 if (orig != data)
7509                                         WREG32(THM_CLK_CNTL, data);
7510
7511                                 orig = data = RREG32(MISC_CLK_CNTL);
7512                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7513                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7514                                 if (orig != data)
7515                                         WREG32(MISC_CLK_CNTL, data);
7516
7517                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7518                                 data &= ~BCLK_AS_XCLK;
7519                                 if (orig != data)
7520                                         WREG32(CG_CLKPIN_CNTL, data);
7521
7522                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7523                                 data &= ~FORCE_BIF_REFCLK_EN;
7524                                 if (orig != data)
7525                                         WREG32(CG_CLKPIN_CNTL_2, data);
7526
7527                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7528                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7529                                 data |= MPLL_CLKOUT_SEL(4);
7530                                 if (orig != data)
7531                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7532
7533                                 orig = data = RREG32(SPLL_CNTL_MODE);
7534                                 data &= ~SPLL_REFCLK_SEL_MASK;
7535                                 if (orig != data)
7536                                         WREG32(SPLL_CNTL_MODE, data);
7537                         }
7538                 }
7539         } else {
7540                 if (orig != data)
7541                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7542         }
7543
7544         orig = data = RREG32_PCIE(PCIE_CNTL2);
7545         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7546         if (orig != data)
7547                 WREG32_PCIE(PCIE_CNTL2, data);
7548
7549         if (!disable_l0s) {
7550                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7551                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7552                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7553                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7554                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7555                                 data &= ~LC_L0S_INACTIVITY_MASK;
7556                                 if (orig != data)
7557                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7558                         }
7559                 }
7560         }
7561 }