Merge branch 'clockevents/fixes' of git://git.linaro.org/people/daniel.lezcano/linux...
[linux-drm-fsl-dcu.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36
37
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
82                                          bool enable);
83 static void si_fini_pg(struct radeon_device *rdev);
84 static void si_fini_cg(struct radeon_device *rdev);
85 static void si_rlc_stop(struct radeon_device *rdev);
86
87 static const u32 verde_rlc_save_restore_register_list[] =
88 {
89         (0x8000 << 16) | (0x98f4 >> 2),
90         0x00000000,
91         (0x8040 << 16) | (0x98f4 >> 2),
92         0x00000000,
93         (0x8000 << 16) | (0xe80 >> 2),
94         0x00000000,
95         (0x8040 << 16) | (0xe80 >> 2),
96         0x00000000,
97         (0x8000 << 16) | (0x89bc >> 2),
98         0x00000000,
99         (0x8040 << 16) | (0x89bc >> 2),
100         0x00000000,
101         (0x8000 << 16) | (0x8c1c >> 2),
102         0x00000000,
103         (0x8040 << 16) | (0x8c1c >> 2),
104         0x00000000,
105         (0x9c00 << 16) | (0x98f0 >> 2),
106         0x00000000,
107         (0x9c00 << 16) | (0xe7c >> 2),
108         0x00000000,
109         (0x8000 << 16) | (0x9148 >> 2),
110         0x00000000,
111         (0x8040 << 16) | (0x9148 >> 2),
112         0x00000000,
113         (0x9c00 << 16) | (0x9150 >> 2),
114         0x00000000,
115         (0x9c00 << 16) | (0x897c >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0x8d8c >> 2),
118         0x00000000,
119         (0x9c00 << 16) | (0xac54 >> 2),
120         0X00000000,
121         0x3,
122         (0x9c00 << 16) | (0x98f8 >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x9910 >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x9914 >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x9918 >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x991c >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x9920 >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x9924 >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x9928 >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x992c >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x9930 >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x9934 >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x9938 >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x993c >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x9940 >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x9944 >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x9948 >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x994c >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x9950 >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x9954 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x9958 >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x995c >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x9960 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9964 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x9968 >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x996c >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9970 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9974 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x9978 >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x997c >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9980 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9984 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x9988 >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x998c >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x8c00 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x8c14 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x8c04 >> 2),
193         0x00000000,
194         (0x9c00 << 16) | (0x8c08 >> 2),
195         0x00000000,
196         (0x8000 << 16) | (0x9b7c >> 2),
197         0x00000000,
198         (0x8040 << 16) | (0x9b7c >> 2),
199         0x00000000,
200         (0x8000 << 16) | (0xe84 >> 2),
201         0x00000000,
202         (0x8040 << 16) | (0xe84 >> 2),
203         0x00000000,
204         (0x8000 << 16) | (0x89c0 >> 2),
205         0x00000000,
206         (0x8040 << 16) | (0x89c0 >> 2),
207         0x00000000,
208         (0x8000 << 16) | (0x914c >> 2),
209         0x00000000,
210         (0x8040 << 16) | (0x914c >> 2),
211         0x00000000,
212         (0x8000 << 16) | (0x8c20 >> 2),
213         0x00000000,
214         (0x8040 << 16) | (0x8c20 >> 2),
215         0x00000000,
216         (0x8000 << 16) | (0x9354 >> 2),
217         0x00000000,
218         (0x8040 << 16) | (0x9354 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9060 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9364 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x9100 >> 2),
225         0x00000000,
226         (0x9c00 << 16) | (0x913c >> 2),
227         0x00000000,
228         (0x8000 << 16) | (0x90e0 >> 2),
229         0x00000000,
230         (0x8000 << 16) | (0x90e4 >> 2),
231         0x00000000,
232         (0x8000 << 16) | (0x90e8 >> 2),
233         0x00000000,
234         (0x8040 << 16) | (0x90e0 >> 2),
235         0x00000000,
236         (0x8040 << 16) | (0x90e4 >> 2),
237         0x00000000,
238         (0x8040 << 16) | (0x90e8 >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8bcc >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x8b24 >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x88c4 >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x8e50 >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x8c0c >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x8e58 >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x8e5c >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0x9508 >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0x950c >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0x9494 >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0xac0c >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0xac10 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0xac14 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0xae00 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0xac08 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x88d4 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x88c8 >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x88cc >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x89b0 >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x8b10 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x8a14 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x9830 >> 2),
283         0x00000000,
284         (0x9c00 << 16) | (0x9834 >> 2),
285         0x00000000,
286         (0x9c00 << 16) | (0x9838 >> 2),
287         0x00000000,
288         (0x9c00 << 16) | (0x9a10 >> 2),
289         0x00000000,
290         (0x8000 << 16) | (0x9870 >> 2),
291         0x00000000,
292         (0x8000 << 16) | (0x9874 >> 2),
293         0x00000000,
294         (0x8001 << 16) | (0x9870 >> 2),
295         0x00000000,
296         (0x8001 << 16) | (0x9874 >> 2),
297         0x00000000,
298         (0x8040 << 16) | (0x9870 >> 2),
299         0x00000000,
300         (0x8040 << 16) | (0x9874 >> 2),
301         0x00000000,
302         (0x8041 << 16) | (0x9870 >> 2),
303         0x00000000,
304         (0x8041 << 16) | (0x9874 >> 2),
305         0x00000000,
306         0x00000000
307 };
308
309 static const u32 tahiti_golden_rlc_registers[] =
310 {
311         0xc424, 0xffffffff, 0x00601005,
312         0xc47c, 0xffffffff, 0x10104040,
313         0xc488, 0xffffffff, 0x0100000a,
314         0xc314, 0xffffffff, 0x00000800,
315         0xc30c, 0xffffffff, 0x800000f4,
316         0xf4a8, 0xffffffff, 0x00000000
317 };
318
319 static const u32 tahiti_golden_registers[] =
320 {
321         0x9a10, 0x00010000, 0x00018208,
322         0x9830, 0xffffffff, 0x00000000,
323         0x9834, 0xf00fffff, 0x00000400,
324         0x9838, 0x0002021c, 0x00020200,
325         0xc78, 0x00000080, 0x00000000,
326         0xd030, 0x000300c0, 0x00800040,
327         0xd830, 0x000300c0, 0x00800040,
328         0x5bb0, 0x000000f0, 0x00000070,
329         0x5bc0, 0x00200000, 0x50100000,
330         0x7030, 0x31000311, 0x00000011,
331         0x277c, 0x00000003, 0x000007ff,
332         0x240c, 0x000007ff, 0x00000000,
333         0x8a14, 0xf000001f, 0x00000007,
334         0x8b24, 0xffffffff, 0x00ffffff,
335         0x8b10, 0x0000ff0f, 0x00000000,
336         0x28a4c, 0x07ffffff, 0x4e000000,
337         0x28350, 0x3f3f3fff, 0x2a00126a,
338         0x30, 0x000000ff, 0x0040,
339         0x34, 0x00000040, 0x00004040,
340         0x9100, 0x07ffffff, 0x03000000,
341         0x8e88, 0x01ff1f3f, 0x00000000,
342         0x8e84, 0x01ff1f3f, 0x00000000,
343         0x9060, 0x0000007f, 0x00000020,
344         0x9508, 0x00010000, 0x00010000,
345         0xac14, 0x00000200, 0x000002fb,
346         0xac10, 0xffffffff, 0x0000543b,
347         0xac0c, 0xffffffff, 0xa9210876,
348         0x88d0, 0xffffffff, 0x000fff40,
349         0x88d4, 0x0000001f, 0x00000010,
350         0x1410, 0x20000000, 0x20fffed8,
351         0x15c0, 0x000c0fc0, 0x000c0400
352 };
353
354 static const u32 tahiti_golden_registers2[] =
355 {
356         0xc64, 0x00000001, 0x00000001
357 };
358
359 static const u32 pitcairn_golden_rlc_registers[] =
360 {
361         0xc424, 0xffffffff, 0x00601004,
362         0xc47c, 0xffffffff, 0x10102020,
363         0xc488, 0xffffffff, 0x01000020,
364         0xc314, 0xffffffff, 0x00000800,
365         0xc30c, 0xffffffff, 0x800000a4
366 };
367
368 static const u32 pitcairn_golden_registers[] =
369 {
370         0x9a10, 0x00010000, 0x00018208,
371         0x9830, 0xffffffff, 0x00000000,
372         0x9834, 0xf00fffff, 0x00000400,
373         0x9838, 0x0002021c, 0x00020200,
374         0xc78, 0x00000080, 0x00000000,
375         0xd030, 0x000300c0, 0x00800040,
376         0xd830, 0x000300c0, 0x00800040,
377         0x5bb0, 0x000000f0, 0x00000070,
378         0x5bc0, 0x00200000, 0x50100000,
379         0x7030, 0x31000311, 0x00000011,
380         0x2ae4, 0x00073ffe, 0x000022a2,
381         0x240c, 0x000007ff, 0x00000000,
382         0x8a14, 0xf000001f, 0x00000007,
383         0x8b24, 0xffffffff, 0x00ffffff,
384         0x8b10, 0x0000ff0f, 0x00000000,
385         0x28a4c, 0x07ffffff, 0x4e000000,
386         0x28350, 0x3f3f3fff, 0x2a00126a,
387         0x30, 0x000000ff, 0x0040,
388         0x34, 0x00000040, 0x00004040,
389         0x9100, 0x07ffffff, 0x03000000,
390         0x9060, 0x0000007f, 0x00000020,
391         0x9508, 0x00010000, 0x00010000,
392         0xac14, 0x000003ff, 0x000000f7,
393         0xac10, 0xffffffff, 0x00000000,
394         0xac0c, 0xffffffff, 0x32761054,
395         0x88d4, 0x0000001f, 0x00000010,
396         0x15c0, 0x000c0fc0, 0x000c0400
397 };
398
399 static const u32 verde_golden_rlc_registers[] =
400 {
401         0xc424, 0xffffffff, 0x033f1005,
402         0xc47c, 0xffffffff, 0x10808020,
403         0xc488, 0xffffffff, 0x00800008,
404         0xc314, 0xffffffff, 0x00001000,
405         0xc30c, 0xffffffff, 0x80010014
406 };
407
408 static const u32 verde_golden_registers[] =
409 {
410         0x9a10, 0x00010000, 0x00018208,
411         0x9830, 0xffffffff, 0x00000000,
412         0x9834, 0xf00fffff, 0x00000400,
413         0x9838, 0x0002021c, 0x00020200,
414         0xc78, 0x00000080, 0x00000000,
415         0xd030, 0x000300c0, 0x00800040,
416         0xd030, 0x000300c0, 0x00800040,
417         0xd830, 0x000300c0, 0x00800040,
418         0xd830, 0x000300c0, 0x00800040,
419         0x5bb0, 0x000000f0, 0x00000070,
420         0x5bc0, 0x00200000, 0x50100000,
421         0x7030, 0x31000311, 0x00000011,
422         0x2ae4, 0x00073ffe, 0x000022a2,
423         0x2ae4, 0x00073ffe, 0x000022a2,
424         0x2ae4, 0x00073ffe, 0x000022a2,
425         0x240c, 0x000007ff, 0x00000000,
426         0x240c, 0x000007ff, 0x00000000,
427         0x240c, 0x000007ff, 0x00000000,
428         0x8a14, 0xf000001f, 0x00000007,
429         0x8a14, 0xf000001f, 0x00000007,
430         0x8a14, 0xf000001f, 0x00000007,
431         0x8b24, 0xffffffff, 0x00ffffff,
432         0x8b10, 0x0000ff0f, 0x00000000,
433         0x28a4c, 0x07ffffff, 0x4e000000,
434         0x28350, 0x3f3f3fff, 0x0000124a,
435         0x28350, 0x3f3f3fff, 0x0000124a,
436         0x28350, 0x3f3f3fff, 0x0000124a,
437         0x30, 0x000000ff, 0x0040,
438         0x34, 0x00000040, 0x00004040,
439         0x9100, 0x07ffffff, 0x03000000,
440         0x9100, 0x07ffffff, 0x03000000,
441         0x8e88, 0x01ff1f3f, 0x00000000,
442         0x8e88, 0x01ff1f3f, 0x00000000,
443         0x8e88, 0x01ff1f3f, 0x00000000,
444         0x8e84, 0x01ff1f3f, 0x00000000,
445         0x8e84, 0x01ff1f3f, 0x00000000,
446         0x8e84, 0x01ff1f3f, 0x00000000,
447         0x9060, 0x0000007f, 0x00000020,
448         0x9508, 0x00010000, 0x00010000,
449         0xac14, 0x000003ff, 0x00000003,
450         0xac14, 0x000003ff, 0x00000003,
451         0xac14, 0x000003ff, 0x00000003,
452         0xac10, 0xffffffff, 0x00000000,
453         0xac10, 0xffffffff, 0x00000000,
454         0xac10, 0xffffffff, 0x00000000,
455         0xac0c, 0xffffffff, 0x00001032,
456         0xac0c, 0xffffffff, 0x00001032,
457         0xac0c, 0xffffffff, 0x00001032,
458         0x88d4, 0x0000001f, 0x00000010,
459         0x88d4, 0x0000001f, 0x00000010,
460         0x88d4, 0x0000001f, 0x00000010,
461         0x15c0, 0x000c0fc0, 0x000c0400
462 };
463
464 static const u32 oland_golden_rlc_registers[] =
465 {
466         0xc424, 0xffffffff, 0x00601005,
467         0xc47c, 0xffffffff, 0x10104040,
468         0xc488, 0xffffffff, 0x0100000a,
469         0xc314, 0xffffffff, 0x00000800,
470         0xc30c, 0xffffffff, 0x800000f4
471 };
472
473 static const u32 oland_golden_registers[] =
474 {
475         0x9a10, 0x00010000, 0x00018208,
476         0x9830, 0xffffffff, 0x00000000,
477         0x9834, 0xf00fffff, 0x00000400,
478         0x9838, 0x0002021c, 0x00020200,
479         0xc78, 0x00000080, 0x00000000,
480         0xd030, 0x000300c0, 0x00800040,
481         0xd830, 0x000300c0, 0x00800040,
482         0x5bb0, 0x000000f0, 0x00000070,
483         0x5bc0, 0x00200000, 0x50100000,
484         0x7030, 0x31000311, 0x00000011,
485         0x2ae4, 0x00073ffe, 0x000022a2,
486         0x240c, 0x000007ff, 0x00000000,
487         0x8a14, 0xf000001f, 0x00000007,
488         0x8b24, 0xffffffff, 0x00ffffff,
489         0x8b10, 0x0000ff0f, 0x00000000,
490         0x28a4c, 0x07ffffff, 0x4e000000,
491         0x28350, 0x3f3f3fff, 0x00000082,
492         0x30, 0x000000ff, 0x0040,
493         0x34, 0x00000040, 0x00004040,
494         0x9100, 0x07ffffff, 0x03000000,
495         0x9060, 0x0000007f, 0x00000020,
496         0x9508, 0x00010000, 0x00010000,
497         0xac14, 0x000003ff, 0x000000f3,
498         0xac10, 0xffffffff, 0x00000000,
499         0xac0c, 0xffffffff, 0x00003210,
500         0x88d4, 0x0000001f, 0x00000010,
501         0x15c0, 0x000c0fc0, 0x000c0400
502 };
503
504 static const u32 hainan_golden_registers[] =
505 {
506         0x9a10, 0x00010000, 0x00018208,
507         0x9830, 0xffffffff, 0x00000000,
508         0x9834, 0xf00fffff, 0x00000400,
509         0x9838, 0x0002021c, 0x00020200,
510         0xd0c0, 0xff000fff, 0x00000100,
511         0xd030, 0x000300c0, 0x00800040,
512         0xd8c0, 0xff000fff, 0x00000100,
513         0xd830, 0x000300c0, 0x00800040,
514         0x2ae4, 0x00073ffe, 0x000022a2,
515         0x240c, 0x000007ff, 0x00000000,
516         0x8a14, 0xf000001f, 0x00000007,
517         0x8b24, 0xffffffff, 0x00ffffff,
518         0x8b10, 0x0000ff0f, 0x00000000,
519         0x28a4c, 0x07ffffff, 0x4e000000,
520         0x28350, 0x3f3f3fff, 0x00000000,
521         0x30, 0x000000ff, 0x0040,
522         0x34, 0x00000040, 0x00004040,
523         0x9100, 0x03e00000, 0x03600000,
524         0x9060, 0x0000007f, 0x00000020,
525         0x9508, 0x00010000, 0x00010000,
526         0xac14, 0x000003ff, 0x000000f1,
527         0xac10, 0xffffffff, 0x00000000,
528         0xac0c, 0xffffffff, 0x00003210,
529         0x88d4, 0x0000001f, 0x00000010,
530         0x15c0, 0x000c0fc0, 0x000c0400
531 };
532
533 static const u32 hainan_golden_registers2[] =
534 {
535         0x98f8, 0xffffffff, 0x02010001
536 };
537
538 static const u32 tahiti_mgcg_cgcg_init[] =
539 {
540         0xc400, 0xffffffff, 0xfffffffc,
541         0x802c, 0xffffffff, 0xe0000000,
542         0x9a60, 0xffffffff, 0x00000100,
543         0x92a4, 0xffffffff, 0x00000100,
544         0xc164, 0xffffffff, 0x00000100,
545         0x9774, 0xffffffff, 0x00000100,
546         0x8984, 0xffffffff, 0x06000100,
547         0x8a18, 0xffffffff, 0x00000100,
548         0x92a0, 0xffffffff, 0x00000100,
549         0xc380, 0xffffffff, 0x00000100,
550         0x8b28, 0xffffffff, 0x00000100,
551         0x9144, 0xffffffff, 0x00000100,
552         0x8d88, 0xffffffff, 0x00000100,
553         0x8d8c, 0xffffffff, 0x00000100,
554         0x9030, 0xffffffff, 0x00000100,
555         0x9034, 0xffffffff, 0x00000100,
556         0x9038, 0xffffffff, 0x00000100,
557         0x903c, 0xffffffff, 0x00000100,
558         0xad80, 0xffffffff, 0x00000100,
559         0xac54, 0xffffffff, 0x00000100,
560         0x897c, 0xffffffff, 0x06000100,
561         0x9868, 0xffffffff, 0x00000100,
562         0x9510, 0xffffffff, 0x00000100,
563         0xaf04, 0xffffffff, 0x00000100,
564         0xae04, 0xffffffff, 0x00000100,
565         0x949c, 0xffffffff, 0x00000100,
566         0x802c, 0xffffffff, 0xe0000000,
567         0x9160, 0xffffffff, 0x00010000,
568         0x9164, 0xffffffff, 0x00030002,
569         0x9168, 0xffffffff, 0x00040007,
570         0x916c, 0xffffffff, 0x00060005,
571         0x9170, 0xffffffff, 0x00090008,
572         0x9174, 0xffffffff, 0x00020001,
573         0x9178, 0xffffffff, 0x00040003,
574         0x917c, 0xffffffff, 0x00000007,
575         0x9180, 0xffffffff, 0x00060005,
576         0x9184, 0xffffffff, 0x00090008,
577         0x9188, 0xffffffff, 0x00030002,
578         0x918c, 0xffffffff, 0x00050004,
579         0x9190, 0xffffffff, 0x00000008,
580         0x9194, 0xffffffff, 0x00070006,
581         0x9198, 0xffffffff, 0x000a0009,
582         0x919c, 0xffffffff, 0x00040003,
583         0x91a0, 0xffffffff, 0x00060005,
584         0x91a4, 0xffffffff, 0x00000009,
585         0x91a8, 0xffffffff, 0x00080007,
586         0x91ac, 0xffffffff, 0x000b000a,
587         0x91b0, 0xffffffff, 0x00050004,
588         0x91b4, 0xffffffff, 0x00070006,
589         0x91b8, 0xffffffff, 0x0008000b,
590         0x91bc, 0xffffffff, 0x000a0009,
591         0x91c0, 0xffffffff, 0x000d000c,
592         0x91c4, 0xffffffff, 0x00060005,
593         0x91c8, 0xffffffff, 0x00080007,
594         0x91cc, 0xffffffff, 0x0000000b,
595         0x91d0, 0xffffffff, 0x000a0009,
596         0x91d4, 0xffffffff, 0x000d000c,
597         0x91d8, 0xffffffff, 0x00070006,
598         0x91dc, 0xffffffff, 0x00090008,
599         0x91e0, 0xffffffff, 0x0000000c,
600         0x91e4, 0xffffffff, 0x000b000a,
601         0x91e8, 0xffffffff, 0x000e000d,
602         0x91ec, 0xffffffff, 0x00080007,
603         0x91f0, 0xffffffff, 0x000a0009,
604         0x91f4, 0xffffffff, 0x0000000d,
605         0x91f8, 0xffffffff, 0x000c000b,
606         0x91fc, 0xffffffff, 0x000f000e,
607         0x9200, 0xffffffff, 0x00090008,
608         0x9204, 0xffffffff, 0x000b000a,
609         0x9208, 0xffffffff, 0x000c000f,
610         0x920c, 0xffffffff, 0x000e000d,
611         0x9210, 0xffffffff, 0x00110010,
612         0x9214, 0xffffffff, 0x000a0009,
613         0x9218, 0xffffffff, 0x000c000b,
614         0x921c, 0xffffffff, 0x0000000f,
615         0x9220, 0xffffffff, 0x000e000d,
616         0x9224, 0xffffffff, 0x00110010,
617         0x9228, 0xffffffff, 0x000b000a,
618         0x922c, 0xffffffff, 0x000d000c,
619         0x9230, 0xffffffff, 0x00000010,
620         0x9234, 0xffffffff, 0x000f000e,
621         0x9238, 0xffffffff, 0x00120011,
622         0x923c, 0xffffffff, 0x000c000b,
623         0x9240, 0xffffffff, 0x000e000d,
624         0x9244, 0xffffffff, 0x00000011,
625         0x9248, 0xffffffff, 0x0010000f,
626         0x924c, 0xffffffff, 0x00130012,
627         0x9250, 0xffffffff, 0x000d000c,
628         0x9254, 0xffffffff, 0x000f000e,
629         0x9258, 0xffffffff, 0x00100013,
630         0x925c, 0xffffffff, 0x00120011,
631         0x9260, 0xffffffff, 0x00150014,
632         0x9264, 0xffffffff, 0x000e000d,
633         0x9268, 0xffffffff, 0x0010000f,
634         0x926c, 0xffffffff, 0x00000013,
635         0x9270, 0xffffffff, 0x00120011,
636         0x9274, 0xffffffff, 0x00150014,
637         0x9278, 0xffffffff, 0x000f000e,
638         0x927c, 0xffffffff, 0x00110010,
639         0x9280, 0xffffffff, 0x00000014,
640         0x9284, 0xffffffff, 0x00130012,
641         0x9288, 0xffffffff, 0x00160015,
642         0x928c, 0xffffffff, 0x0010000f,
643         0x9290, 0xffffffff, 0x00120011,
644         0x9294, 0xffffffff, 0x00000015,
645         0x9298, 0xffffffff, 0x00140013,
646         0x929c, 0xffffffff, 0x00170016,
647         0x9150, 0xffffffff, 0x96940200,
648         0x8708, 0xffffffff, 0x00900100,
649         0xc478, 0xffffffff, 0x00000080,
650         0xc404, 0xffffffff, 0x0020003f,
651         0x30, 0xffffffff, 0x0000001c,
652         0x34, 0x000f0000, 0x000f0000,
653         0x160c, 0xffffffff, 0x00000100,
654         0x1024, 0xffffffff, 0x00000100,
655         0x102c, 0x00000101, 0x00000000,
656         0x20a8, 0xffffffff, 0x00000104,
657         0x264c, 0x000c0000, 0x000c0000,
658         0x2648, 0x000c0000, 0x000c0000,
659         0x55e4, 0xff000fff, 0x00000100,
660         0x55e8, 0x00000001, 0x00000001,
661         0x2f50, 0x00000001, 0x00000001,
662         0x30cc, 0xc0000fff, 0x00000104,
663         0xc1e4, 0x00000001, 0x00000001,
664         0xd0c0, 0xfffffff0, 0x00000100,
665         0xd8c0, 0xfffffff0, 0x00000100
666 };
667
668 static const u32 pitcairn_mgcg_cgcg_init[] =
669 {
670         0xc400, 0xffffffff, 0xfffffffc,
671         0x802c, 0xffffffff, 0xe0000000,
672         0x9a60, 0xffffffff, 0x00000100,
673         0x92a4, 0xffffffff, 0x00000100,
674         0xc164, 0xffffffff, 0x00000100,
675         0x9774, 0xffffffff, 0x00000100,
676         0x8984, 0xffffffff, 0x06000100,
677         0x8a18, 0xffffffff, 0x00000100,
678         0x92a0, 0xffffffff, 0x00000100,
679         0xc380, 0xffffffff, 0x00000100,
680         0x8b28, 0xffffffff, 0x00000100,
681         0x9144, 0xffffffff, 0x00000100,
682         0x8d88, 0xffffffff, 0x00000100,
683         0x8d8c, 0xffffffff, 0x00000100,
684         0x9030, 0xffffffff, 0x00000100,
685         0x9034, 0xffffffff, 0x00000100,
686         0x9038, 0xffffffff, 0x00000100,
687         0x903c, 0xffffffff, 0x00000100,
688         0xad80, 0xffffffff, 0x00000100,
689         0xac54, 0xffffffff, 0x00000100,
690         0x897c, 0xffffffff, 0x06000100,
691         0x9868, 0xffffffff, 0x00000100,
692         0x9510, 0xffffffff, 0x00000100,
693         0xaf04, 0xffffffff, 0x00000100,
694         0xae04, 0xffffffff, 0x00000100,
695         0x949c, 0xffffffff, 0x00000100,
696         0x802c, 0xffffffff, 0xe0000000,
697         0x9160, 0xffffffff, 0x00010000,
698         0x9164, 0xffffffff, 0x00030002,
699         0x9168, 0xffffffff, 0x00040007,
700         0x916c, 0xffffffff, 0x00060005,
701         0x9170, 0xffffffff, 0x00090008,
702         0x9174, 0xffffffff, 0x00020001,
703         0x9178, 0xffffffff, 0x00040003,
704         0x917c, 0xffffffff, 0x00000007,
705         0x9180, 0xffffffff, 0x00060005,
706         0x9184, 0xffffffff, 0x00090008,
707         0x9188, 0xffffffff, 0x00030002,
708         0x918c, 0xffffffff, 0x00050004,
709         0x9190, 0xffffffff, 0x00000008,
710         0x9194, 0xffffffff, 0x00070006,
711         0x9198, 0xffffffff, 0x000a0009,
712         0x919c, 0xffffffff, 0x00040003,
713         0x91a0, 0xffffffff, 0x00060005,
714         0x91a4, 0xffffffff, 0x00000009,
715         0x91a8, 0xffffffff, 0x00080007,
716         0x91ac, 0xffffffff, 0x000b000a,
717         0x91b0, 0xffffffff, 0x00050004,
718         0x91b4, 0xffffffff, 0x00070006,
719         0x91b8, 0xffffffff, 0x0008000b,
720         0x91bc, 0xffffffff, 0x000a0009,
721         0x91c0, 0xffffffff, 0x000d000c,
722         0x9200, 0xffffffff, 0x00090008,
723         0x9204, 0xffffffff, 0x000b000a,
724         0x9208, 0xffffffff, 0x000c000f,
725         0x920c, 0xffffffff, 0x000e000d,
726         0x9210, 0xffffffff, 0x00110010,
727         0x9214, 0xffffffff, 0x000a0009,
728         0x9218, 0xffffffff, 0x000c000b,
729         0x921c, 0xffffffff, 0x0000000f,
730         0x9220, 0xffffffff, 0x000e000d,
731         0x9224, 0xffffffff, 0x00110010,
732         0x9228, 0xffffffff, 0x000b000a,
733         0x922c, 0xffffffff, 0x000d000c,
734         0x9230, 0xffffffff, 0x00000010,
735         0x9234, 0xffffffff, 0x000f000e,
736         0x9238, 0xffffffff, 0x00120011,
737         0x923c, 0xffffffff, 0x000c000b,
738         0x9240, 0xffffffff, 0x000e000d,
739         0x9244, 0xffffffff, 0x00000011,
740         0x9248, 0xffffffff, 0x0010000f,
741         0x924c, 0xffffffff, 0x00130012,
742         0x9250, 0xffffffff, 0x000d000c,
743         0x9254, 0xffffffff, 0x000f000e,
744         0x9258, 0xffffffff, 0x00100013,
745         0x925c, 0xffffffff, 0x00120011,
746         0x9260, 0xffffffff, 0x00150014,
747         0x9150, 0xffffffff, 0x96940200,
748         0x8708, 0xffffffff, 0x00900100,
749         0xc478, 0xffffffff, 0x00000080,
750         0xc404, 0xffffffff, 0x0020003f,
751         0x30, 0xffffffff, 0x0000001c,
752         0x34, 0x000f0000, 0x000f0000,
753         0x160c, 0xffffffff, 0x00000100,
754         0x1024, 0xffffffff, 0x00000100,
755         0x102c, 0x00000101, 0x00000000,
756         0x20a8, 0xffffffff, 0x00000104,
757         0x55e4, 0xff000fff, 0x00000100,
758         0x55e8, 0x00000001, 0x00000001,
759         0x2f50, 0x00000001, 0x00000001,
760         0x30cc, 0xc0000fff, 0x00000104,
761         0xc1e4, 0x00000001, 0x00000001,
762         0xd0c0, 0xfffffff0, 0x00000100,
763         0xd8c0, 0xfffffff0, 0x00000100
764 };
765
766 static const u32 verde_mgcg_cgcg_init[] =
767 {
768         0xc400, 0xffffffff, 0xfffffffc,
769         0x802c, 0xffffffff, 0xe0000000,
770         0x9a60, 0xffffffff, 0x00000100,
771         0x92a4, 0xffffffff, 0x00000100,
772         0xc164, 0xffffffff, 0x00000100,
773         0x9774, 0xffffffff, 0x00000100,
774         0x8984, 0xffffffff, 0x06000100,
775         0x8a18, 0xffffffff, 0x00000100,
776         0x92a0, 0xffffffff, 0x00000100,
777         0xc380, 0xffffffff, 0x00000100,
778         0x8b28, 0xffffffff, 0x00000100,
779         0x9144, 0xffffffff, 0x00000100,
780         0x8d88, 0xffffffff, 0x00000100,
781         0x8d8c, 0xffffffff, 0x00000100,
782         0x9030, 0xffffffff, 0x00000100,
783         0x9034, 0xffffffff, 0x00000100,
784         0x9038, 0xffffffff, 0x00000100,
785         0x903c, 0xffffffff, 0x00000100,
786         0xad80, 0xffffffff, 0x00000100,
787         0xac54, 0xffffffff, 0x00000100,
788         0x897c, 0xffffffff, 0x06000100,
789         0x9868, 0xffffffff, 0x00000100,
790         0x9510, 0xffffffff, 0x00000100,
791         0xaf04, 0xffffffff, 0x00000100,
792         0xae04, 0xffffffff, 0x00000100,
793         0x949c, 0xffffffff, 0x00000100,
794         0x802c, 0xffffffff, 0xe0000000,
795         0x9160, 0xffffffff, 0x00010000,
796         0x9164, 0xffffffff, 0x00030002,
797         0x9168, 0xffffffff, 0x00040007,
798         0x916c, 0xffffffff, 0x00060005,
799         0x9170, 0xffffffff, 0x00090008,
800         0x9174, 0xffffffff, 0x00020001,
801         0x9178, 0xffffffff, 0x00040003,
802         0x917c, 0xffffffff, 0x00000007,
803         0x9180, 0xffffffff, 0x00060005,
804         0x9184, 0xffffffff, 0x00090008,
805         0x9188, 0xffffffff, 0x00030002,
806         0x918c, 0xffffffff, 0x00050004,
807         0x9190, 0xffffffff, 0x00000008,
808         0x9194, 0xffffffff, 0x00070006,
809         0x9198, 0xffffffff, 0x000a0009,
810         0x919c, 0xffffffff, 0x00040003,
811         0x91a0, 0xffffffff, 0x00060005,
812         0x91a4, 0xffffffff, 0x00000009,
813         0x91a8, 0xffffffff, 0x00080007,
814         0x91ac, 0xffffffff, 0x000b000a,
815         0x91b0, 0xffffffff, 0x00050004,
816         0x91b4, 0xffffffff, 0x00070006,
817         0x91b8, 0xffffffff, 0x0008000b,
818         0x91bc, 0xffffffff, 0x000a0009,
819         0x91c0, 0xffffffff, 0x000d000c,
820         0x9200, 0xffffffff, 0x00090008,
821         0x9204, 0xffffffff, 0x000b000a,
822         0x9208, 0xffffffff, 0x000c000f,
823         0x920c, 0xffffffff, 0x000e000d,
824         0x9210, 0xffffffff, 0x00110010,
825         0x9214, 0xffffffff, 0x000a0009,
826         0x9218, 0xffffffff, 0x000c000b,
827         0x921c, 0xffffffff, 0x0000000f,
828         0x9220, 0xffffffff, 0x000e000d,
829         0x9224, 0xffffffff, 0x00110010,
830         0x9228, 0xffffffff, 0x000b000a,
831         0x922c, 0xffffffff, 0x000d000c,
832         0x9230, 0xffffffff, 0x00000010,
833         0x9234, 0xffffffff, 0x000f000e,
834         0x9238, 0xffffffff, 0x00120011,
835         0x923c, 0xffffffff, 0x000c000b,
836         0x9240, 0xffffffff, 0x000e000d,
837         0x9244, 0xffffffff, 0x00000011,
838         0x9248, 0xffffffff, 0x0010000f,
839         0x924c, 0xffffffff, 0x00130012,
840         0x9250, 0xffffffff, 0x000d000c,
841         0x9254, 0xffffffff, 0x000f000e,
842         0x9258, 0xffffffff, 0x00100013,
843         0x925c, 0xffffffff, 0x00120011,
844         0x9260, 0xffffffff, 0x00150014,
845         0x9150, 0xffffffff, 0x96940200,
846         0x8708, 0xffffffff, 0x00900100,
847         0xc478, 0xffffffff, 0x00000080,
848         0xc404, 0xffffffff, 0x0020003f,
849         0x30, 0xffffffff, 0x0000001c,
850         0x34, 0x000f0000, 0x000f0000,
851         0x160c, 0xffffffff, 0x00000100,
852         0x1024, 0xffffffff, 0x00000100,
853         0x102c, 0x00000101, 0x00000000,
854         0x20a8, 0xffffffff, 0x00000104,
855         0x264c, 0x000c0000, 0x000c0000,
856         0x2648, 0x000c0000, 0x000c0000,
857         0x55e4, 0xff000fff, 0x00000100,
858         0x55e8, 0x00000001, 0x00000001,
859         0x2f50, 0x00000001, 0x00000001,
860         0x30cc, 0xc0000fff, 0x00000104,
861         0xc1e4, 0x00000001, 0x00000001,
862         0xd0c0, 0xfffffff0, 0x00000100,
863         0xd8c0, 0xfffffff0, 0x00000100
864 };
865
866 static const u32 oland_mgcg_cgcg_init[] =
867 {
868         0xc400, 0xffffffff, 0xfffffffc,
869         0x802c, 0xffffffff, 0xe0000000,
870         0x9a60, 0xffffffff, 0x00000100,
871         0x92a4, 0xffffffff, 0x00000100,
872         0xc164, 0xffffffff, 0x00000100,
873         0x9774, 0xffffffff, 0x00000100,
874         0x8984, 0xffffffff, 0x06000100,
875         0x8a18, 0xffffffff, 0x00000100,
876         0x92a0, 0xffffffff, 0x00000100,
877         0xc380, 0xffffffff, 0x00000100,
878         0x8b28, 0xffffffff, 0x00000100,
879         0x9144, 0xffffffff, 0x00000100,
880         0x8d88, 0xffffffff, 0x00000100,
881         0x8d8c, 0xffffffff, 0x00000100,
882         0x9030, 0xffffffff, 0x00000100,
883         0x9034, 0xffffffff, 0x00000100,
884         0x9038, 0xffffffff, 0x00000100,
885         0x903c, 0xffffffff, 0x00000100,
886         0xad80, 0xffffffff, 0x00000100,
887         0xac54, 0xffffffff, 0x00000100,
888         0x897c, 0xffffffff, 0x06000100,
889         0x9868, 0xffffffff, 0x00000100,
890         0x9510, 0xffffffff, 0x00000100,
891         0xaf04, 0xffffffff, 0x00000100,
892         0xae04, 0xffffffff, 0x00000100,
893         0x949c, 0xffffffff, 0x00000100,
894         0x802c, 0xffffffff, 0xe0000000,
895         0x9160, 0xffffffff, 0x00010000,
896         0x9164, 0xffffffff, 0x00030002,
897         0x9168, 0xffffffff, 0x00040007,
898         0x916c, 0xffffffff, 0x00060005,
899         0x9170, 0xffffffff, 0x00090008,
900         0x9174, 0xffffffff, 0x00020001,
901         0x9178, 0xffffffff, 0x00040003,
902         0x917c, 0xffffffff, 0x00000007,
903         0x9180, 0xffffffff, 0x00060005,
904         0x9184, 0xffffffff, 0x00090008,
905         0x9188, 0xffffffff, 0x00030002,
906         0x918c, 0xffffffff, 0x00050004,
907         0x9190, 0xffffffff, 0x00000008,
908         0x9194, 0xffffffff, 0x00070006,
909         0x9198, 0xffffffff, 0x000a0009,
910         0x919c, 0xffffffff, 0x00040003,
911         0x91a0, 0xffffffff, 0x00060005,
912         0x91a4, 0xffffffff, 0x00000009,
913         0x91a8, 0xffffffff, 0x00080007,
914         0x91ac, 0xffffffff, 0x000b000a,
915         0x91b0, 0xffffffff, 0x00050004,
916         0x91b4, 0xffffffff, 0x00070006,
917         0x91b8, 0xffffffff, 0x0008000b,
918         0x91bc, 0xffffffff, 0x000a0009,
919         0x91c0, 0xffffffff, 0x000d000c,
920         0x91c4, 0xffffffff, 0x00060005,
921         0x91c8, 0xffffffff, 0x00080007,
922         0x91cc, 0xffffffff, 0x0000000b,
923         0x91d0, 0xffffffff, 0x000a0009,
924         0x91d4, 0xffffffff, 0x000d000c,
925         0x9150, 0xffffffff, 0x96940200,
926         0x8708, 0xffffffff, 0x00900100,
927         0xc478, 0xffffffff, 0x00000080,
928         0xc404, 0xffffffff, 0x0020003f,
929         0x30, 0xffffffff, 0x0000001c,
930         0x34, 0x000f0000, 0x000f0000,
931         0x160c, 0xffffffff, 0x00000100,
932         0x1024, 0xffffffff, 0x00000100,
933         0x102c, 0x00000101, 0x00000000,
934         0x20a8, 0xffffffff, 0x00000104,
935         0x264c, 0x000c0000, 0x000c0000,
936         0x2648, 0x000c0000, 0x000c0000,
937         0x55e4, 0xff000fff, 0x00000100,
938         0x55e8, 0x00000001, 0x00000001,
939         0x2f50, 0x00000001, 0x00000001,
940         0x30cc, 0xc0000fff, 0x00000104,
941         0xc1e4, 0x00000001, 0x00000001,
942         0xd0c0, 0xfffffff0, 0x00000100,
943         0xd8c0, 0xfffffff0, 0x00000100
944 };
945
946 static const u32 hainan_mgcg_cgcg_init[] =
947 {
948         0xc400, 0xffffffff, 0xfffffffc,
949         0x802c, 0xffffffff, 0xe0000000,
950         0x9a60, 0xffffffff, 0x00000100,
951         0x92a4, 0xffffffff, 0x00000100,
952         0xc164, 0xffffffff, 0x00000100,
953         0x9774, 0xffffffff, 0x00000100,
954         0x8984, 0xffffffff, 0x06000100,
955         0x8a18, 0xffffffff, 0x00000100,
956         0x92a0, 0xffffffff, 0x00000100,
957         0xc380, 0xffffffff, 0x00000100,
958         0x8b28, 0xffffffff, 0x00000100,
959         0x9144, 0xffffffff, 0x00000100,
960         0x8d88, 0xffffffff, 0x00000100,
961         0x8d8c, 0xffffffff, 0x00000100,
962         0x9030, 0xffffffff, 0x00000100,
963         0x9034, 0xffffffff, 0x00000100,
964         0x9038, 0xffffffff, 0x00000100,
965         0x903c, 0xffffffff, 0x00000100,
966         0xad80, 0xffffffff, 0x00000100,
967         0xac54, 0xffffffff, 0x00000100,
968         0x897c, 0xffffffff, 0x06000100,
969         0x9868, 0xffffffff, 0x00000100,
970         0x9510, 0xffffffff, 0x00000100,
971         0xaf04, 0xffffffff, 0x00000100,
972         0xae04, 0xffffffff, 0x00000100,
973         0x949c, 0xffffffff, 0x00000100,
974         0x802c, 0xffffffff, 0xe0000000,
975         0x9160, 0xffffffff, 0x00010000,
976         0x9164, 0xffffffff, 0x00030002,
977         0x9168, 0xffffffff, 0x00040007,
978         0x916c, 0xffffffff, 0x00060005,
979         0x9170, 0xffffffff, 0x00090008,
980         0x9174, 0xffffffff, 0x00020001,
981         0x9178, 0xffffffff, 0x00040003,
982         0x917c, 0xffffffff, 0x00000007,
983         0x9180, 0xffffffff, 0x00060005,
984         0x9184, 0xffffffff, 0x00090008,
985         0x9188, 0xffffffff, 0x00030002,
986         0x918c, 0xffffffff, 0x00050004,
987         0x9190, 0xffffffff, 0x00000008,
988         0x9194, 0xffffffff, 0x00070006,
989         0x9198, 0xffffffff, 0x000a0009,
990         0x919c, 0xffffffff, 0x00040003,
991         0x91a0, 0xffffffff, 0x00060005,
992         0x91a4, 0xffffffff, 0x00000009,
993         0x91a8, 0xffffffff, 0x00080007,
994         0x91ac, 0xffffffff, 0x000b000a,
995         0x91b0, 0xffffffff, 0x00050004,
996         0x91b4, 0xffffffff, 0x00070006,
997         0x91b8, 0xffffffff, 0x0008000b,
998         0x91bc, 0xffffffff, 0x000a0009,
999         0x91c0, 0xffffffff, 0x000d000c,
1000         0x91c4, 0xffffffff, 0x00060005,
1001         0x91c8, 0xffffffff, 0x00080007,
1002         0x91cc, 0xffffffff, 0x0000000b,
1003         0x91d0, 0xffffffff, 0x000a0009,
1004         0x91d4, 0xffffffff, 0x000d000c,
1005         0x9150, 0xffffffff, 0x96940200,
1006         0x8708, 0xffffffff, 0x00900100,
1007         0xc478, 0xffffffff, 0x00000080,
1008         0xc404, 0xffffffff, 0x0020003f,
1009         0x30, 0xffffffff, 0x0000001c,
1010         0x34, 0x000f0000, 0x000f0000,
1011         0x160c, 0xffffffff, 0x00000100,
1012         0x1024, 0xffffffff, 0x00000100,
1013         0x20a8, 0xffffffff, 0x00000104,
1014         0x264c, 0x000c0000, 0x000c0000,
1015         0x2648, 0x000c0000, 0x000c0000,
1016         0x2f50, 0x00000001, 0x00000001,
1017         0x30cc, 0xc0000fff, 0x00000104,
1018         0xc1e4, 0x00000001, 0x00000001,
1019         0xd0c0, 0xfffffff0, 0x00000100,
1020         0xd8c0, 0xfffffff0, 0x00000100
1021 };
1022
1023 static u32 verde_pg_init[] =
1024 {
1025         0x353c, 0xffffffff, 0x40000,
1026         0x3538, 0xffffffff, 0x200010ff,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x7007,
1033         0x3538, 0xffffffff, 0x300010ff,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x400000,
1040         0x3538, 0xffffffff, 0x100010ff,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x120200,
1047         0x3538, 0xffffffff, 0x500010ff,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x1e1e16,
1054         0x3538, 0xffffffff, 0x600010ff,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x171f1e,
1061         0x3538, 0xffffffff, 0x700010ff,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x0,
1066         0x353c, 0xffffffff, 0x0,
1067         0x353c, 0xffffffff, 0x0,
1068         0x3538, 0xffffffff, 0x9ff,
1069         0x3500, 0xffffffff, 0x0,
1070         0x3504, 0xffffffff, 0x10000800,
1071         0x3504, 0xffffffff, 0xf,
1072         0x3504, 0xffffffff, 0xf,
1073         0x3500, 0xffffffff, 0x4,
1074         0x3504, 0xffffffff, 0x1000051e,
1075         0x3504, 0xffffffff, 0xffff,
1076         0x3504, 0xffffffff, 0xffff,
1077         0x3500, 0xffffffff, 0x8,
1078         0x3504, 0xffffffff, 0x80500,
1079         0x3500, 0xffffffff, 0x12,
1080         0x3504, 0xffffffff, 0x9050c,
1081         0x3500, 0xffffffff, 0x1d,
1082         0x3504, 0xffffffff, 0xb052c,
1083         0x3500, 0xffffffff, 0x2a,
1084         0x3504, 0xffffffff, 0x1053e,
1085         0x3500, 0xffffffff, 0x2d,
1086         0x3504, 0xffffffff, 0x10546,
1087         0x3500, 0xffffffff, 0x30,
1088         0x3504, 0xffffffff, 0xa054e,
1089         0x3500, 0xffffffff, 0x3c,
1090         0x3504, 0xffffffff, 0x1055f,
1091         0x3500, 0xffffffff, 0x3f,
1092         0x3504, 0xffffffff, 0x10567,
1093         0x3500, 0xffffffff, 0x42,
1094         0x3504, 0xffffffff, 0x1056f,
1095         0x3500, 0xffffffff, 0x45,
1096         0x3504, 0xffffffff, 0x10572,
1097         0x3500, 0xffffffff, 0x48,
1098         0x3504, 0xffffffff, 0x20575,
1099         0x3500, 0xffffffff, 0x4c,
1100         0x3504, 0xffffffff, 0x190801,
1101         0x3500, 0xffffffff, 0x67,
1102         0x3504, 0xffffffff, 0x1082a,
1103         0x3500, 0xffffffff, 0x6a,
1104         0x3504, 0xffffffff, 0x1b082d,
1105         0x3500, 0xffffffff, 0x87,
1106         0x3504, 0xffffffff, 0x310851,
1107         0x3500, 0xffffffff, 0xba,
1108         0x3504, 0xffffffff, 0x891,
1109         0x3500, 0xffffffff, 0xbc,
1110         0x3504, 0xffffffff, 0x893,
1111         0x3500, 0xffffffff, 0xbe,
1112         0x3504, 0xffffffff, 0x20895,
1113         0x3500, 0xffffffff, 0xc2,
1114         0x3504, 0xffffffff, 0x20899,
1115         0x3500, 0xffffffff, 0xc6,
1116         0x3504, 0xffffffff, 0x2089d,
1117         0x3500, 0xffffffff, 0xca,
1118         0x3504, 0xffffffff, 0x8a1,
1119         0x3500, 0xffffffff, 0xcc,
1120         0x3504, 0xffffffff, 0x8a3,
1121         0x3500, 0xffffffff, 0xce,
1122         0x3504, 0xffffffff, 0x308a5,
1123         0x3500, 0xffffffff, 0xd3,
1124         0x3504, 0xffffffff, 0x6d08cd,
1125         0x3500, 0xffffffff, 0x142,
1126         0x3504, 0xffffffff, 0x2000095a,
1127         0x3504, 0xffffffff, 0x1,
1128         0x3500, 0xffffffff, 0x144,
1129         0x3504, 0xffffffff, 0x301f095b,
1130         0x3500, 0xffffffff, 0x165,
1131         0x3504, 0xffffffff, 0xc094d,
1132         0x3500, 0xffffffff, 0x173,
1133         0x3504, 0xffffffff, 0xf096d,
1134         0x3500, 0xffffffff, 0x184,
1135         0x3504, 0xffffffff, 0x15097f,
1136         0x3500, 0xffffffff, 0x19b,
1137         0x3504, 0xffffffff, 0xc0998,
1138         0x3500, 0xffffffff, 0x1a9,
1139         0x3504, 0xffffffff, 0x409a7,
1140         0x3500, 0xffffffff, 0x1af,
1141         0x3504, 0xffffffff, 0xcdc,
1142         0x3500, 0xffffffff, 0x1b1,
1143         0x3504, 0xffffffff, 0x800,
1144         0x3508, 0xffffffff, 0x6c9b2000,
1145         0x3510, 0xfc00, 0x2000,
1146         0x3544, 0xffffffff, 0xfc0,
1147         0x28d4, 0x00000100, 0x100
1148 };
1149
1150 static void si_init_golden_registers(struct radeon_device *rdev)
1151 {
1152         switch (rdev->family) {
1153         case CHIP_TAHITI:
1154                 radeon_program_register_sequence(rdev,
1155                                                  tahiti_golden_registers,
1156                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1157                 radeon_program_register_sequence(rdev,
1158                                                  tahiti_golden_rlc_registers,
1159                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1160                 radeon_program_register_sequence(rdev,
1161                                                  tahiti_mgcg_cgcg_init,
1162                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1163                 radeon_program_register_sequence(rdev,
1164                                                  tahiti_golden_registers2,
1165                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1166                 break;
1167         case CHIP_PITCAIRN:
1168                 radeon_program_register_sequence(rdev,
1169                                                  pitcairn_golden_registers,
1170                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1171                 radeon_program_register_sequence(rdev,
1172                                                  pitcairn_golden_rlc_registers,
1173                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1174                 radeon_program_register_sequence(rdev,
1175                                                  pitcairn_mgcg_cgcg_init,
1176                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1177                 break;
1178         case CHIP_VERDE:
1179                 radeon_program_register_sequence(rdev,
1180                                                  verde_golden_registers,
1181                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1182                 radeon_program_register_sequence(rdev,
1183                                                  verde_golden_rlc_registers,
1184                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1185                 radeon_program_register_sequence(rdev,
1186                                                  verde_mgcg_cgcg_init,
1187                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1188                 radeon_program_register_sequence(rdev,
1189                                                  verde_pg_init,
1190                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1191                 break;
1192         case CHIP_OLAND:
1193                 radeon_program_register_sequence(rdev,
1194                                                  oland_golden_registers,
1195                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1196                 radeon_program_register_sequence(rdev,
1197                                                  oland_golden_rlc_registers,
1198                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1199                 radeon_program_register_sequence(rdev,
1200                                                  oland_mgcg_cgcg_init,
1201                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1202                 break;
1203         case CHIP_HAINAN:
1204                 radeon_program_register_sequence(rdev,
1205                                                  hainan_golden_registers,
1206                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1207                 radeon_program_register_sequence(rdev,
1208                                                  hainan_golden_registers2,
1209                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1210                 radeon_program_register_sequence(rdev,
1211                                                  hainan_mgcg_cgcg_init,
1212                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1213                 break;
1214         default:
1215                 break;
1216         }
1217 }
1218
1219 #define PCIE_BUS_CLK                10000
1220 #define TCLK                        (PCIE_BUS_CLK / 10)
1221
1222 /**
1223  * si_get_xclk - get the xclk
1224  *
1225  * @rdev: radeon_device pointer
1226  *
1227  * Returns the reference clock used by the gfx engine
1228  * (SI).
1229  */
1230 u32 si_get_xclk(struct radeon_device *rdev)
1231 {
1232         u32 reference_clock = rdev->clock.spll.reference_freq;
1233         u32 tmp;
1234
1235         tmp = RREG32(CG_CLKPIN_CNTL_2);
1236         if (tmp & MUX_TCLK_TO_XCLK)
1237                 return TCLK;
1238
1239         tmp = RREG32(CG_CLKPIN_CNTL);
1240         if (tmp & XTALIN_DIVIDE)
1241                 return reference_clock / 4;
1242
1243         return reference_clock;
1244 }
1245
1246 /* get temperature in millidegrees */
1247 int si_get_temp(struct radeon_device *rdev)
1248 {
1249         u32 temp;
1250         int actual_temp = 0;
1251
1252         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1253                 CTF_TEMP_SHIFT;
1254
1255         if (temp & 0x200)
1256                 actual_temp = 255;
1257         else
1258                 actual_temp = temp & 0x1ff;
1259
1260         actual_temp = (actual_temp * 1000);
1261
1262         return actual_temp;
1263 }
1264
1265 #define TAHITI_IO_MC_REGS_SIZE 36
1266
1267 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1268         {0x0000006f, 0x03044000},
1269         {0x00000070, 0x0480c018},
1270         {0x00000071, 0x00000040},
1271         {0x00000072, 0x01000000},
1272         {0x00000074, 0x000000ff},
1273         {0x00000075, 0x00143400},
1274         {0x00000076, 0x08ec0800},
1275         {0x00000077, 0x040000cc},
1276         {0x00000079, 0x00000000},
1277         {0x0000007a, 0x21000409},
1278         {0x0000007c, 0x00000000},
1279         {0x0000007d, 0xe8000000},
1280         {0x0000007e, 0x044408a8},
1281         {0x0000007f, 0x00000003},
1282         {0x00000080, 0x00000000},
1283         {0x00000081, 0x01000000},
1284         {0x00000082, 0x02000000},
1285         {0x00000083, 0x00000000},
1286         {0x00000084, 0xe3f3e4f4},
1287         {0x00000085, 0x00052024},
1288         {0x00000087, 0x00000000},
1289         {0x00000088, 0x66036603},
1290         {0x00000089, 0x01000000},
1291         {0x0000008b, 0x1c0a0000},
1292         {0x0000008c, 0xff010000},
1293         {0x0000008e, 0xffffefff},
1294         {0x0000008f, 0xfff3efff},
1295         {0x00000090, 0xfff3efbf},
1296         {0x00000094, 0x00101101},
1297         {0x00000095, 0x00000fff},
1298         {0x00000096, 0x00116fff},
1299         {0x00000097, 0x60010000},
1300         {0x00000098, 0x10010000},
1301         {0x00000099, 0x00006000},
1302         {0x0000009a, 0x00001000},
1303         {0x0000009f, 0x00a77400}
1304 };
1305
1306 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1307         {0x0000006f, 0x03044000},
1308         {0x00000070, 0x0480c018},
1309         {0x00000071, 0x00000040},
1310         {0x00000072, 0x01000000},
1311         {0x00000074, 0x000000ff},
1312         {0x00000075, 0x00143400},
1313         {0x00000076, 0x08ec0800},
1314         {0x00000077, 0x040000cc},
1315         {0x00000079, 0x00000000},
1316         {0x0000007a, 0x21000409},
1317         {0x0000007c, 0x00000000},
1318         {0x0000007d, 0xe8000000},
1319         {0x0000007e, 0x044408a8},
1320         {0x0000007f, 0x00000003},
1321         {0x00000080, 0x00000000},
1322         {0x00000081, 0x01000000},
1323         {0x00000082, 0x02000000},
1324         {0x00000083, 0x00000000},
1325         {0x00000084, 0xe3f3e4f4},
1326         {0x00000085, 0x00052024},
1327         {0x00000087, 0x00000000},
1328         {0x00000088, 0x66036603},
1329         {0x00000089, 0x01000000},
1330         {0x0000008b, 0x1c0a0000},
1331         {0x0000008c, 0xff010000},
1332         {0x0000008e, 0xffffefff},
1333         {0x0000008f, 0xfff3efff},
1334         {0x00000090, 0xfff3efbf},
1335         {0x00000094, 0x00101101},
1336         {0x00000095, 0x00000fff},
1337         {0x00000096, 0x00116fff},
1338         {0x00000097, 0x60010000},
1339         {0x00000098, 0x10010000},
1340         {0x00000099, 0x00006000},
1341         {0x0000009a, 0x00001000},
1342         {0x0000009f, 0x00a47400}
1343 };
1344
1345 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1346         {0x0000006f, 0x03044000},
1347         {0x00000070, 0x0480c018},
1348         {0x00000071, 0x00000040},
1349         {0x00000072, 0x01000000},
1350         {0x00000074, 0x000000ff},
1351         {0x00000075, 0x00143400},
1352         {0x00000076, 0x08ec0800},
1353         {0x00000077, 0x040000cc},
1354         {0x00000079, 0x00000000},
1355         {0x0000007a, 0x21000409},
1356         {0x0000007c, 0x00000000},
1357         {0x0000007d, 0xe8000000},
1358         {0x0000007e, 0x044408a8},
1359         {0x0000007f, 0x00000003},
1360         {0x00000080, 0x00000000},
1361         {0x00000081, 0x01000000},
1362         {0x00000082, 0x02000000},
1363         {0x00000083, 0x00000000},
1364         {0x00000084, 0xe3f3e4f4},
1365         {0x00000085, 0x00052024},
1366         {0x00000087, 0x00000000},
1367         {0x00000088, 0x66036603},
1368         {0x00000089, 0x01000000},
1369         {0x0000008b, 0x1c0a0000},
1370         {0x0000008c, 0xff010000},
1371         {0x0000008e, 0xffffefff},
1372         {0x0000008f, 0xfff3efff},
1373         {0x00000090, 0xfff3efbf},
1374         {0x00000094, 0x00101101},
1375         {0x00000095, 0x00000fff},
1376         {0x00000096, 0x00116fff},
1377         {0x00000097, 0x60010000},
1378         {0x00000098, 0x10010000},
1379         {0x00000099, 0x00006000},
1380         {0x0000009a, 0x00001000},
1381         {0x0000009f, 0x00a37400}
1382 };
1383
1384 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1385         {0x0000006f, 0x03044000},
1386         {0x00000070, 0x0480c018},
1387         {0x00000071, 0x00000040},
1388         {0x00000072, 0x01000000},
1389         {0x00000074, 0x000000ff},
1390         {0x00000075, 0x00143400},
1391         {0x00000076, 0x08ec0800},
1392         {0x00000077, 0x040000cc},
1393         {0x00000079, 0x00000000},
1394         {0x0000007a, 0x21000409},
1395         {0x0000007c, 0x00000000},
1396         {0x0000007d, 0xe8000000},
1397         {0x0000007e, 0x044408a8},
1398         {0x0000007f, 0x00000003},
1399         {0x00000080, 0x00000000},
1400         {0x00000081, 0x01000000},
1401         {0x00000082, 0x02000000},
1402         {0x00000083, 0x00000000},
1403         {0x00000084, 0xe3f3e4f4},
1404         {0x00000085, 0x00052024},
1405         {0x00000087, 0x00000000},
1406         {0x00000088, 0x66036603},
1407         {0x00000089, 0x01000000},
1408         {0x0000008b, 0x1c0a0000},
1409         {0x0000008c, 0xff010000},
1410         {0x0000008e, 0xffffefff},
1411         {0x0000008f, 0xfff3efff},
1412         {0x00000090, 0xfff3efbf},
1413         {0x00000094, 0x00101101},
1414         {0x00000095, 0x00000fff},
1415         {0x00000096, 0x00116fff},
1416         {0x00000097, 0x60010000},
1417         {0x00000098, 0x10010000},
1418         {0x00000099, 0x00006000},
1419         {0x0000009a, 0x00001000},
1420         {0x0000009f, 0x00a17730}
1421 };
1422
1423 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1424         {0x0000006f, 0x03044000},
1425         {0x00000070, 0x0480c018},
1426         {0x00000071, 0x00000040},
1427         {0x00000072, 0x01000000},
1428         {0x00000074, 0x000000ff},
1429         {0x00000075, 0x00143400},
1430         {0x00000076, 0x08ec0800},
1431         {0x00000077, 0x040000cc},
1432         {0x00000079, 0x00000000},
1433         {0x0000007a, 0x21000409},
1434         {0x0000007c, 0x00000000},
1435         {0x0000007d, 0xe8000000},
1436         {0x0000007e, 0x044408a8},
1437         {0x0000007f, 0x00000003},
1438         {0x00000080, 0x00000000},
1439         {0x00000081, 0x01000000},
1440         {0x00000082, 0x02000000},
1441         {0x00000083, 0x00000000},
1442         {0x00000084, 0xe3f3e4f4},
1443         {0x00000085, 0x00052024},
1444         {0x00000087, 0x00000000},
1445         {0x00000088, 0x66036603},
1446         {0x00000089, 0x01000000},
1447         {0x0000008b, 0x1c0a0000},
1448         {0x0000008c, 0xff010000},
1449         {0x0000008e, 0xffffefff},
1450         {0x0000008f, 0xfff3efff},
1451         {0x00000090, 0xfff3efbf},
1452         {0x00000094, 0x00101101},
1453         {0x00000095, 0x00000fff},
1454         {0x00000096, 0x00116fff},
1455         {0x00000097, 0x60010000},
1456         {0x00000098, 0x10010000},
1457         {0x00000099, 0x00006000},
1458         {0x0000009a, 0x00001000},
1459         {0x0000009f, 0x00a07730}
1460 };
1461
1462 /* ucode loading */
1463 static int si_mc_load_microcode(struct radeon_device *rdev)
1464 {
1465         const __be32 *fw_data;
1466         u32 running, blackout = 0;
1467         u32 *io_mc_regs;
1468         int i, ucode_size, regs_size;
1469
1470         if (!rdev->mc_fw)
1471                 return -EINVAL;
1472
1473         switch (rdev->family) {
1474         case CHIP_TAHITI:
1475                 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1476                 ucode_size = SI_MC_UCODE_SIZE;
1477                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1478                 break;
1479         case CHIP_PITCAIRN:
1480                 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1481                 ucode_size = SI_MC_UCODE_SIZE;
1482                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1483                 break;
1484         case CHIP_VERDE:
1485         default:
1486                 io_mc_regs = (u32 *)&verde_io_mc_regs;
1487                 ucode_size = SI_MC_UCODE_SIZE;
1488                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1489                 break;
1490         case CHIP_OLAND:
1491                 io_mc_regs = (u32 *)&oland_io_mc_regs;
1492                 ucode_size = OLAND_MC_UCODE_SIZE;
1493                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1494                 break;
1495         case CHIP_HAINAN:
1496                 io_mc_regs = (u32 *)&hainan_io_mc_regs;
1497                 ucode_size = OLAND_MC_UCODE_SIZE;
1498                 regs_size = TAHITI_IO_MC_REGS_SIZE;
1499                 break;
1500         }
1501
1502         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1503
1504         if (running == 0) {
1505                 if (running) {
1506                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1507                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1508                 }
1509
1510                 /* reset the engine and set to writable */
1511                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1512                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1513
1514                 /* load mc io regs */
1515                 for (i = 0; i < regs_size; i++) {
1516                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1517                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1518                 }
1519                 /* load the MC ucode */
1520                 fw_data = (const __be32 *)rdev->mc_fw->data;
1521                 for (i = 0; i < ucode_size; i++)
1522                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1523
1524                 /* put the engine back into the active state */
1525                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1526                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1527                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1528
1529                 /* wait for training to complete */
1530                 for (i = 0; i < rdev->usec_timeout; i++) {
1531                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1532                                 break;
1533                         udelay(1);
1534                 }
1535                 for (i = 0; i < rdev->usec_timeout; i++) {
1536                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537                                 break;
1538                         udelay(1);
1539                 }
1540
1541                 if (running)
1542                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1543         }
1544
1545         return 0;
1546 }
1547
1548 static int si_init_microcode(struct radeon_device *rdev)
1549 {
1550         const char *chip_name;
1551         const char *rlc_chip_name;
1552         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1553         size_t smc_req_size;
1554         char fw_name[30];
1555         int err;
1556
1557         DRM_DEBUG("\n");
1558
1559         switch (rdev->family) {
1560         case CHIP_TAHITI:
1561                 chip_name = "TAHITI";
1562                 rlc_chip_name = "TAHITI";
1563                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1564                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1565                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1566                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1567                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1568                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1569                 break;
1570         case CHIP_PITCAIRN:
1571                 chip_name = "PITCAIRN";
1572                 rlc_chip_name = "PITCAIRN";
1573                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1574                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1575                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1576                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1577                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1578                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1579                 break;
1580         case CHIP_VERDE:
1581                 chip_name = "VERDE";
1582                 rlc_chip_name = "VERDE";
1583                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1584                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1585                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1586                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1587                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1588                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1589                 break;
1590         case CHIP_OLAND:
1591                 chip_name = "OLAND";
1592                 rlc_chip_name = "OLAND";
1593                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1594                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1595                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1596                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1597                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1598                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1599                 break;
1600         case CHIP_HAINAN:
1601                 chip_name = "HAINAN";
1602                 rlc_chip_name = "HAINAN";
1603                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1604                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1605                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1606                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1607                 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1608                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1609                 break;
1610         default: BUG();
1611         }
1612
1613         DRM_INFO("Loading %s Microcode\n", chip_name);
1614
1615         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1616         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1617         if (err)
1618                 goto out;
1619         if (rdev->pfp_fw->size != pfp_req_size) {
1620                 printk(KERN_ERR
1621                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1622                        rdev->pfp_fw->size, fw_name);
1623                 err = -EINVAL;
1624                 goto out;
1625         }
1626
1627         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1628         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1629         if (err)
1630                 goto out;
1631         if (rdev->me_fw->size != me_req_size) {
1632                 printk(KERN_ERR
1633                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1634                        rdev->me_fw->size, fw_name);
1635                 err = -EINVAL;
1636         }
1637
1638         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1639         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1640         if (err)
1641                 goto out;
1642         if (rdev->ce_fw->size != ce_req_size) {
1643                 printk(KERN_ERR
1644                        "si_cp: Bogus length %zu in firmware \"%s\"\n",
1645                        rdev->ce_fw->size, fw_name);
1646                 err = -EINVAL;
1647         }
1648
1649         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1650         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651         if (err)
1652                 goto out;
1653         if (rdev->rlc_fw->size != rlc_req_size) {
1654                 printk(KERN_ERR
1655                        "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1656                        rdev->rlc_fw->size, fw_name);
1657                 err = -EINVAL;
1658         }
1659
1660         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1661         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662         if (err)
1663                 goto out;
1664         if (rdev->mc_fw->size != mc_req_size) {
1665                 printk(KERN_ERR
1666                        "si_mc: Bogus length %zu in firmware \"%s\"\n",
1667                        rdev->mc_fw->size, fw_name);
1668                 err = -EINVAL;
1669         }
1670
1671         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673         if (err) {
1674                 printk(KERN_ERR
1675                        "smc: error loading firmware \"%s\"\n",
1676                        fw_name);
1677                 release_firmware(rdev->smc_fw);
1678                 rdev->smc_fw = NULL;
1679                 err = 0;
1680         } else if (rdev->smc_fw->size != smc_req_size) {
1681                 printk(KERN_ERR
1682                        "si_smc: Bogus length %zu in firmware \"%s\"\n",
1683                        rdev->smc_fw->size, fw_name);
1684                 err = -EINVAL;
1685         }
1686
1687 out:
1688         if (err) {
1689                 if (err != -EINVAL)
1690                         printk(KERN_ERR
1691                                "si_cp: Failed to load firmware \"%s\"\n",
1692                                fw_name);
1693                 release_firmware(rdev->pfp_fw);
1694                 rdev->pfp_fw = NULL;
1695                 release_firmware(rdev->me_fw);
1696                 rdev->me_fw = NULL;
1697                 release_firmware(rdev->ce_fw);
1698                 rdev->ce_fw = NULL;
1699                 release_firmware(rdev->rlc_fw);
1700                 rdev->rlc_fw = NULL;
1701                 release_firmware(rdev->mc_fw);
1702                 rdev->mc_fw = NULL;
1703                 release_firmware(rdev->smc_fw);
1704                 rdev->smc_fw = NULL;
1705         }
1706         return err;
1707 }
1708
1709 /* watermark setup */
1710 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1711                                    struct radeon_crtc *radeon_crtc,
1712                                    struct drm_display_mode *mode,
1713                                    struct drm_display_mode *other_mode)
1714 {
1715         u32 tmp, buffer_alloc, i;
1716         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1717         /*
1718          * Line Buffer Setup
1719          * There are 3 line buffers, each one shared by 2 display controllers.
1720          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1721          * the display controllers.  The paritioning is done via one of four
1722          * preset allocations specified in bits 21:20:
1723          *  0 - half lb
1724          *  2 - whole lb, other crtc must be disabled
1725          */
1726         /* this can get tricky if we have two large displays on a paired group
1727          * of crtcs.  Ideally for multiple large displays we'd assign them to
1728          * non-linked crtcs for maximum line buffer allocation.
1729          */
1730         if (radeon_crtc->base.enabled && mode) {
1731                 if (other_mode) {
1732                         tmp = 0; /* 1/2 */
1733                         buffer_alloc = 1;
1734                 } else {
1735                         tmp = 2; /* whole */
1736                         buffer_alloc = 2;
1737                 }
1738         } else {
1739                 tmp = 0;
1740                 buffer_alloc = 0;
1741         }
1742
1743         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1744                DC_LB_MEMORY_CONFIG(tmp));
1745
1746         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1747                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1748         for (i = 0; i < rdev->usec_timeout; i++) {
1749                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1750                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1751                         break;
1752                 udelay(1);
1753         }
1754
1755         if (radeon_crtc->base.enabled && mode) {
1756                 switch (tmp) {
1757                 case 0:
1758                 default:
1759                         return 4096 * 2;
1760                 case 2:
1761                         return 8192 * 2;
1762                 }
1763         }
1764
1765         /* controller not enabled, so no lb used */
1766         return 0;
1767 }
1768
1769 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1770 {
1771         u32 tmp = RREG32(MC_SHARED_CHMAP);
1772
1773         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1774         case 0:
1775         default:
1776                 return 1;
1777         case 1:
1778                 return 2;
1779         case 2:
1780                 return 4;
1781         case 3:
1782                 return 8;
1783         case 4:
1784                 return 3;
1785         case 5:
1786                 return 6;
1787         case 6:
1788                 return 10;
1789         case 7:
1790                 return 12;
1791         case 8:
1792                 return 16;
1793         }
1794 }
1795
1796 struct dce6_wm_params {
1797         u32 dram_channels; /* number of dram channels */
1798         u32 yclk;          /* bandwidth per dram data pin in kHz */
1799         u32 sclk;          /* engine clock in kHz */
1800         u32 disp_clk;      /* display clock in kHz */
1801         u32 src_width;     /* viewport width */
1802         u32 active_time;   /* active display time in ns */
1803         u32 blank_time;    /* blank time in ns */
1804         bool interlaced;    /* mode is interlaced */
1805         fixed20_12 vsc;    /* vertical scale ratio */
1806         u32 num_heads;     /* number of active crtcs */
1807         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1808         u32 lb_size;       /* line buffer allocated to pipe */
1809         u32 vtaps;         /* vertical scaler taps */
1810 };
1811
1812 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1813 {
1814         /* Calculate raw DRAM Bandwidth */
1815         fixed20_12 dram_efficiency; /* 0.7 */
1816         fixed20_12 yclk, dram_channels, bandwidth;
1817         fixed20_12 a;
1818
1819         a.full = dfixed_const(1000);
1820         yclk.full = dfixed_const(wm->yclk);
1821         yclk.full = dfixed_div(yclk, a);
1822         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1823         a.full = dfixed_const(10);
1824         dram_efficiency.full = dfixed_const(7);
1825         dram_efficiency.full = dfixed_div(dram_efficiency, a);
1826         bandwidth.full = dfixed_mul(dram_channels, yclk);
1827         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1828
1829         return dfixed_trunc(bandwidth);
1830 }
1831
1832 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1833 {
1834         /* Calculate DRAM Bandwidth and the part allocated to display. */
1835         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1836         fixed20_12 yclk, dram_channels, bandwidth;
1837         fixed20_12 a;
1838
1839         a.full = dfixed_const(1000);
1840         yclk.full = dfixed_const(wm->yclk);
1841         yclk.full = dfixed_div(yclk, a);
1842         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1843         a.full = dfixed_const(10);
1844         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1845         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1846         bandwidth.full = dfixed_mul(dram_channels, yclk);
1847         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1848
1849         return dfixed_trunc(bandwidth);
1850 }
1851
1852 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1853 {
1854         /* Calculate the display Data return Bandwidth */
1855         fixed20_12 return_efficiency; /* 0.8 */
1856         fixed20_12 sclk, bandwidth;
1857         fixed20_12 a;
1858
1859         a.full = dfixed_const(1000);
1860         sclk.full = dfixed_const(wm->sclk);
1861         sclk.full = dfixed_div(sclk, a);
1862         a.full = dfixed_const(10);
1863         return_efficiency.full = dfixed_const(8);
1864         return_efficiency.full = dfixed_div(return_efficiency, a);
1865         a.full = dfixed_const(32);
1866         bandwidth.full = dfixed_mul(a, sclk);
1867         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1868
1869         return dfixed_trunc(bandwidth);
1870 }
1871
1872 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1873 {
1874         return 32;
1875 }
1876
1877 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1878 {
1879         /* Calculate the DMIF Request Bandwidth */
1880         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1881         fixed20_12 disp_clk, sclk, bandwidth;
1882         fixed20_12 a, b1, b2;
1883         u32 min_bandwidth;
1884
1885         a.full = dfixed_const(1000);
1886         disp_clk.full = dfixed_const(wm->disp_clk);
1887         disp_clk.full = dfixed_div(disp_clk, a);
1888         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1889         b1.full = dfixed_mul(a, disp_clk);
1890
1891         a.full = dfixed_const(1000);
1892         sclk.full = dfixed_const(wm->sclk);
1893         sclk.full = dfixed_div(sclk, a);
1894         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1895         b2.full = dfixed_mul(a, sclk);
1896
1897         a.full = dfixed_const(10);
1898         disp_clk_request_efficiency.full = dfixed_const(8);
1899         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1900
1901         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1902
1903         a.full = dfixed_const(min_bandwidth);
1904         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1905
1906         return dfixed_trunc(bandwidth);
1907 }
1908
1909 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1910 {
1911         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1912         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1913         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1914         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1915
1916         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1917 }
1918
1919 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1920 {
1921         /* Calculate the display mode Average Bandwidth
1922          * DisplayMode should contain the source and destination dimensions,
1923          * timing, etc.
1924          */
1925         fixed20_12 bpp;
1926         fixed20_12 line_time;
1927         fixed20_12 src_width;
1928         fixed20_12 bandwidth;
1929         fixed20_12 a;
1930
1931         a.full = dfixed_const(1000);
1932         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1933         line_time.full = dfixed_div(line_time, a);
1934         bpp.full = dfixed_const(wm->bytes_per_pixel);
1935         src_width.full = dfixed_const(wm->src_width);
1936         bandwidth.full = dfixed_mul(src_width, bpp);
1937         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1938         bandwidth.full = dfixed_div(bandwidth, line_time);
1939
1940         return dfixed_trunc(bandwidth);
1941 }
1942
1943 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1944 {
1945         /* First calcualte the latency in ns */
1946         u32 mc_latency = 2000; /* 2000 ns. */
1947         u32 available_bandwidth = dce6_available_bandwidth(wm);
1948         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1949         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1950         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1951         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1952                 (wm->num_heads * cursor_line_pair_return_time);
1953         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1954         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1955         u32 tmp, dmif_size = 12288;
1956         fixed20_12 a, b, c;
1957
1958         if (wm->num_heads == 0)
1959                 return 0;
1960
1961         a.full = dfixed_const(2);
1962         b.full = dfixed_const(1);
1963         if ((wm->vsc.full > a.full) ||
1964             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1965             (wm->vtaps >= 5) ||
1966             ((wm->vsc.full >= a.full) && wm->interlaced))
1967                 max_src_lines_per_dst_line = 4;
1968         else
1969                 max_src_lines_per_dst_line = 2;
1970
1971         a.full = dfixed_const(available_bandwidth);
1972         b.full = dfixed_const(wm->num_heads);
1973         a.full = dfixed_div(a, b);
1974
1975         b.full = dfixed_const(mc_latency + 512);
1976         c.full = dfixed_const(wm->disp_clk);
1977         b.full = dfixed_div(b, c);
1978
1979         c.full = dfixed_const(dmif_size);
1980         b.full = dfixed_div(c, b);
1981
1982         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1983
1984         b.full = dfixed_const(1000);
1985         c.full = dfixed_const(wm->disp_clk);
1986         b.full = dfixed_div(c, b);
1987         c.full = dfixed_const(wm->bytes_per_pixel);
1988         b.full = dfixed_mul(b, c);
1989
1990         lb_fill_bw = min(tmp, dfixed_trunc(b));
1991
1992         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1993         b.full = dfixed_const(1000);
1994         c.full = dfixed_const(lb_fill_bw);
1995         b.full = dfixed_div(c, b);
1996         a.full = dfixed_div(a, b);
1997         line_fill_time = dfixed_trunc(a);
1998
1999         if (line_fill_time < wm->active_time)
2000                 return latency;
2001         else
2002                 return latency + (line_fill_time - wm->active_time);
2003
2004 }
2005
2006 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2007 {
2008         if (dce6_average_bandwidth(wm) <=
2009             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2010                 return true;
2011         else
2012                 return false;
2013 };
2014
2015 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2016 {
2017         if (dce6_average_bandwidth(wm) <=
2018             (dce6_available_bandwidth(wm) / wm->num_heads))
2019                 return true;
2020         else
2021                 return false;
2022 };
2023
2024 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2025 {
2026         u32 lb_partitions = wm->lb_size / wm->src_width;
2027         u32 line_time = wm->active_time + wm->blank_time;
2028         u32 latency_tolerant_lines;
2029         u32 latency_hiding;
2030         fixed20_12 a;
2031
2032         a.full = dfixed_const(1);
2033         if (wm->vsc.full > a.full)
2034                 latency_tolerant_lines = 1;
2035         else {
2036                 if (lb_partitions <= (wm->vtaps + 1))
2037                         latency_tolerant_lines = 1;
2038                 else
2039                         latency_tolerant_lines = 2;
2040         }
2041
2042         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2043
2044         if (dce6_latency_watermark(wm) <= latency_hiding)
2045                 return true;
2046         else
2047                 return false;
2048 }
2049
2050 static void dce6_program_watermarks(struct radeon_device *rdev,
2051                                          struct radeon_crtc *radeon_crtc,
2052                                          u32 lb_size, u32 num_heads)
2053 {
2054         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2055         struct dce6_wm_params wm_low, wm_high;
2056         u32 dram_channels;
2057         u32 pixel_period;
2058         u32 line_time = 0;
2059         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2060         u32 priority_a_mark = 0, priority_b_mark = 0;
2061         u32 priority_a_cnt = PRIORITY_OFF;
2062         u32 priority_b_cnt = PRIORITY_OFF;
2063         u32 tmp, arb_control3;
2064         fixed20_12 a, b, c;
2065
2066         if (radeon_crtc->base.enabled && num_heads && mode) {
2067                 pixel_period = 1000000 / (u32)mode->clock;
2068                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2069                 priority_a_cnt = 0;
2070                 priority_b_cnt = 0;
2071
2072                 if (rdev->family == CHIP_ARUBA)
2073                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2074                 else
2075                         dram_channels = si_get_number_of_dram_channels(rdev);
2076
2077                 /* watermark for high clocks */
2078                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2079                         wm_high.yclk =
2080                                 radeon_dpm_get_mclk(rdev, false) * 10;
2081                         wm_high.sclk =
2082                                 radeon_dpm_get_sclk(rdev, false) * 10;
2083                 } else {
2084                         wm_high.yclk = rdev->pm.current_mclk * 10;
2085                         wm_high.sclk = rdev->pm.current_sclk * 10;
2086                 }
2087
2088                 wm_high.disp_clk = mode->clock;
2089                 wm_high.src_width = mode->crtc_hdisplay;
2090                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2091                 wm_high.blank_time = line_time - wm_high.active_time;
2092                 wm_high.interlaced = false;
2093                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2094                         wm_high.interlaced = true;
2095                 wm_high.vsc = radeon_crtc->vsc;
2096                 wm_high.vtaps = 1;
2097                 if (radeon_crtc->rmx_type != RMX_OFF)
2098                         wm_high.vtaps = 2;
2099                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2100                 wm_high.lb_size = lb_size;
2101                 wm_high.dram_channels = dram_channels;
2102                 wm_high.num_heads = num_heads;
2103
2104                 /* watermark for low clocks */
2105                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2106                         wm_low.yclk =
2107                                 radeon_dpm_get_mclk(rdev, true) * 10;
2108                         wm_low.sclk =
2109                                 radeon_dpm_get_sclk(rdev, true) * 10;
2110                 } else {
2111                         wm_low.yclk = rdev->pm.current_mclk * 10;
2112                         wm_low.sclk = rdev->pm.current_sclk * 10;
2113                 }
2114
2115                 wm_low.disp_clk = mode->clock;
2116                 wm_low.src_width = mode->crtc_hdisplay;
2117                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2118                 wm_low.blank_time = line_time - wm_low.active_time;
2119                 wm_low.interlaced = false;
2120                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2121                         wm_low.interlaced = true;
2122                 wm_low.vsc = radeon_crtc->vsc;
2123                 wm_low.vtaps = 1;
2124                 if (radeon_crtc->rmx_type != RMX_OFF)
2125                         wm_low.vtaps = 2;
2126                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2127                 wm_low.lb_size = lb_size;
2128                 wm_low.dram_channels = dram_channels;
2129                 wm_low.num_heads = num_heads;
2130
2131                 /* set for high clocks */
2132                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2133                 /* set for low clocks */
2134                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2135
2136                 /* possibly force display priority to high */
2137                 /* should really do this at mode validation time... */
2138                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2139                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2140                     !dce6_check_latency_hiding(&wm_high) ||
2141                     (rdev->disp_priority == 2)) {
2142                         DRM_DEBUG_KMS("force priority to high\n");
2143                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2144                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2145                 }
2146                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2147                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2148                     !dce6_check_latency_hiding(&wm_low) ||
2149                     (rdev->disp_priority == 2)) {
2150                         DRM_DEBUG_KMS("force priority to high\n");
2151                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2152                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2153                 }
2154
2155                 a.full = dfixed_const(1000);
2156                 b.full = dfixed_const(mode->clock);
2157                 b.full = dfixed_div(b, a);
2158                 c.full = dfixed_const(latency_watermark_a);
2159                 c.full = dfixed_mul(c, b);
2160                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2161                 c.full = dfixed_div(c, a);
2162                 a.full = dfixed_const(16);
2163                 c.full = dfixed_div(c, a);
2164                 priority_a_mark = dfixed_trunc(c);
2165                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2166
2167                 a.full = dfixed_const(1000);
2168                 b.full = dfixed_const(mode->clock);
2169                 b.full = dfixed_div(b, a);
2170                 c.full = dfixed_const(latency_watermark_b);
2171                 c.full = dfixed_mul(c, b);
2172                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2173                 c.full = dfixed_div(c, a);
2174                 a.full = dfixed_const(16);
2175                 c.full = dfixed_div(c, a);
2176                 priority_b_mark = dfixed_trunc(c);
2177                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2178         }
2179
2180         /* select wm A */
2181         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2182         tmp = arb_control3;
2183         tmp &= ~LATENCY_WATERMARK_MASK(3);
2184         tmp |= LATENCY_WATERMARK_MASK(1);
2185         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2186         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2187                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2188                 LATENCY_HIGH_WATERMARK(line_time)));
2189         /* select wm B */
2190         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2191         tmp &= ~LATENCY_WATERMARK_MASK(3);
2192         tmp |= LATENCY_WATERMARK_MASK(2);
2193         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2194         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2195                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2196                 LATENCY_HIGH_WATERMARK(line_time)));
2197         /* restore original selection */
2198         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2199
2200         /* write the priority marks */
2201         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2202         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2203
2204         /* save values for DPM */
2205         radeon_crtc->line_time = line_time;
2206         radeon_crtc->wm_high = latency_watermark_a;
2207         radeon_crtc->wm_low = latency_watermark_b;
2208 }
2209
2210 void dce6_bandwidth_update(struct radeon_device *rdev)
2211 {
2212         struct drm_display_mode *mode0 = NULL;
2213         struct drm_display_mode *mode1 = NULL;
2214         u32 num_heads = 0, lb_size;
2215         int i;
2216
2217         radeon_update_display_priority(rdev);
2218
2219         for (i = 0; i < rdev->num_crtc; i++) {
2220                 if (rdev->mode_info.crtcs[i]->base.enabled)
2221                         num_heads++;
2222         }
2223         for (i = 0; i < rdev->num_crtc; i += 2) {
2224                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2225                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2226                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2227                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2228                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2229                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2230         }
2231 }
2232
2233 /*
2234  * Core functions
2235  */
2236 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2237 {
2238         const u32 num_tile_mode_states = 32;
2239         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2240
2241         switch (rdev->config.si.mem_row_size_in_kb) {
2242         case 1:
2243                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2244                 break;
2245         case 2:
2246         default:
2247                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2248                 break;
2249         case 4:
2250                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2251                 break;
2252         }
2253
2254         if ((rdev->family == CHIP_TAHITI) ||
2255             (rdev->family == CHIP_PITCAIRN)) {
2256                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2257                         switch (reg_offset) {
2258                         case 0:  /* non-AA compressed depth or any compressed stencil */
2259                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2260                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2261                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2262                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2263                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2264                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2267                                 break;
2268                         case 1:  /* 2xAA/4xAA compressed depth only */
2269                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2271                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2272                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2273                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2274                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2276                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2277                                 break;
2278                         case 2:  /* 8xAA compressed depth only */
2279                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2281                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2282                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2283                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2284                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2287                                 break;
2288                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2293                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2294                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297                                 break;
2298                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2299                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2300                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2303                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2304                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307                                 break;
2308                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2309                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312                                                  TILE_SPLIT(split_equal_to_row_size) |
2313                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2314                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317                                 break;
2318                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2319                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2321                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322                                                  TILE_SPLIT(split_equal_to_row_size) |
2323                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2324                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2326                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2327                                 break;
2328                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2329                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2331                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332                                                  TILE_SPLIT(split_equal_to_row_size) |
2333                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2334                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337                                 break;
2338                         case 8:  /* 1D and 1D Array Surfaces */
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2343                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2344                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2346                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2347                                 break;
2348                         case 9:  /* Displayable maps. */
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2353                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2354                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2357                                 break;
2358                         case 10:  /* Display 8bpp. */
2359                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2363                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2364                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367                                 break;
2368                         case 11:  /* Display 16bpp. */
2369                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2373                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2374                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377                                 break;
2378                         case 12:  /* Display 32bpp. */
2379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2383                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2384                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2387                                 break;
2388                         case 13:  /* Thin. */
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2391                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2393                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2394                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2396                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397                                 break;
2398                         case 14:  /* Thin 8 bpp. */
2399                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2403                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2404                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2406                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2407                                 break;
2408                         case 15:  /* Thin 16 bpp. */
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2413                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2414                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2417                                 break;
2418                         case 16:  /* Thin 32 bpp. */
2419                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2423                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2424                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2426                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2427                                 break;
2428                         case 17:  /* Thin 64 bpp. */
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432                                                  TILE_SPLIT(split_equal_to_row_size) |
2433                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2434                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2437                                 break;
2438                         case 21:  /* 8 bpp PRT. */
2439                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2443                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2444                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2447                                 break;
2448                         case 22:  /* 16 bpp PRT */
2449                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2454                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2457                                 break;
2458                         case 23:  /* 32 bpp PRT */
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2463                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2464                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2467                                 break;
2468                         case 24:  /* 64 bpp PRT */
2469                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2471                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2473                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2474                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2477                                 break;
2478                         case 25:  /* 128 bpp PRT */
2479                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2481                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2483                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2484                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2487                                 break;
2488                         default:
2489                                 gb_tile_moden = 0;
2490                                 break;
2491                         }
2492                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2493                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2494                 }
2495         } else if ((rdev->family == CHIP_VERDE) ||
2496                    (rdev->family == CHIP_OLAND) ||
2497                    (rdev->family == CHIP_HAINAN)) {
2498                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2499                         switch (reg_offset) {
2500                         case 0:  /* non-AA compressed depth or any compressed stencil */
2501                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2503                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2504                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2505                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2506                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2508                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2509                                 break;
2510                         case 1:  /* 2xAA/4xAA compressed depth only */
2511                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2513                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2514                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2516                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2519                                 break;
2520                         case 2:  /* 8xAA compressed depth only */
2521                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2524                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2525                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2526                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2529                                 break;
2530                         case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2531                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2535                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2536                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539                                 break;
2540                         case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2541                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2546                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549                                 break;
2550                         case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2551                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554                                                  TILE_SPLIT(split_equal_to_row_size) |
2555                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2556                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559                                 break;
2560                         case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2561                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564                                                  TILE_SPLIT(split_equal_to_row_size) |
2565                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2566                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569                                 break;
2570                         case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2571                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                                  MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2573                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574                                                  TILE_SPLIT(split_equal_to_row_size) |
2575                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2576                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2579                                 break;
2580                         case 8:  /* 1D and 1D Array Surfaces */
2581                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2582                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2585                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2586                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2588                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589                                 break;
2590                         case 9:  /* Displayable maps. */
2591                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2595                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2596                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2599                                 break;
2600                         case 10:  /* Display 8bpp. */
2601                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2605                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2606                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2609                                 break;
2610                         case 11:  /* Display 16bpp. */
2611                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2615                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2616                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619                                 break;
2620                         case 12:  /* Display 32bpp. */
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2625                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2626                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2629                                 break;
2630                         case 13:  /* Thin. */
2631                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2632                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2635                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2636                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2638                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2639                                 break;
2640                         case 14:  /* Thin 8 bpp. */
2641                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2646                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649                                 break;
2650                         case 15:  /* Thin 16 bpp. */
2651                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2656                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659                                 break;
2660                         case 16:  /* Thin 32 bpp. */
2661                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2665                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2666                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669                                 break;
2670                         case 17:  /* Thin 64 bpp. */
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674                                                  TILE_SPLIT(split_equal_to_row_size) |
2675                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2676                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679                                 break;
2680                         case 21:  /* 8 bpp PRT. */
2681                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2684                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2685                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2686                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2687                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689                                 break;
2690                         case 22:  /* 16 bpp PRT */
2691                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2696                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2699                                 break;
2700                         case 23:  /* 32 bpp PRT */
2701                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2704                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2706                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2709                                 break;
2710                         case 24:  /* 64 bpp PRT */
2711                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2715                                                  NUM_BANKS(ADDR_SURF_16_BANK) |
2716                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2718                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2719                                 break;
2720                         case 25:  /* 128 bpp PRT */
2721                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722                                                  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2723                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2724                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2725                                                  NUM_BANKS(ADDR_SURF_8_BANK) |
2726                                                  BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2729                                 break;
2730                         default:
2731                                 gb_tile_moden = 0;
2732                                 break;
2733                         }
2734                         rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2735                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2736                 }
2737         } else
2738                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2739 }
2740
2741 static void si_select_se_sh(struct radeon_device *rdev,
2742                             u32 se_num, u32 sh_num)
2743 {
2744         u32 data = INSTANCE_BROADCAST_WRITES;
2745
2746         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2747                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2748         else if (se_num == 0xffffffff)
2749                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2750         else if (sh_num == 0xffffffff)
2751                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2752         else
2753                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2754         WREG32(GRBM_GFX_INDEX, data);
2755 }
2756
2757 static u32 si_create_bitmask(u32 bit_width)
2758 {
2759         u32 i, mask = 0;
2760
2761         for (i = 0; i < bit_width; i++) {
2762                 mask <<= 1;
2763                 mask |= 1;
2764         }
2765         return mask;
2766 }
2767
2768 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2769 {
2770         u32 data, mask;
2771
2772         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2773         if (data & 1)
2774                 data &= INACTIVE_CUS_MASK;
2775         else
2776                 data = 0;
2777         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2778
2779         data >>= INACTIVE_CUS_SHIFT;
2780
2781         mask = si_create_bitmask(cu_per_sh);
2782
2783         return ~data & mask;
2784 }
2785
2786 static void si_setup_spi(struct radeon_device *rdev,
2787                          u32 se_num, u32 sh_per_se,
2788                          u32 cu_per_sh)
2789 {
2790         int i, j, k;
2791         u32 data, mask, active_cu;
2792
2793         for (i = 0; i < se_num; i++) {
2794                 for (j = 0; j < sh_per_se; j++) {
2795                         si_select_se_sh(rdev, i, j);
2796                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2797                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2798
2799                         mask = 1;
2800                         for (k = 0; k < 16; k++) {
2801                                 mask <<= k;
2802                                 if (active_cu & mask) {
2803                                         data &= ~mask;
2804                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2805                                         break;
2806                                 }
2807                         }
2808                 }
2809         }
2810         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2811 }
2812
2813 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2814                               u32 max_rb_num, u32 se_num,
2815                               u32 sh_per_se)
2816 {
2817         u32 data, mask;
2818
2819         data = RREG32(CC_RB_BACKEND_DISABLE);
2820         if (data & 1)
2821                 data &= BACKEND_DISABLE_MASK;
2822         else
2823                 data = 0;
2824         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2825
2826         data >>= BACKEND_DISABLE_SHIFT;
2827
2828         mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2829
2830         return data & mask;
2831 }
2832
2833 static void si_setup_rb(struct radeon_device *rdev,
2834                         u32 se_num, u32 sh_per_se,
2835                         u32 max_rb_num)
2836 {
2837         int i, j;
2838         u32 data, mask;
2839         u32 disabled_rbs = 0;
2840         u32 enabled_rbs = 0;
2841
2842         for (i = 0; i < se_num; i++) {
2843                 for (j = 0; j < sh_per_se; j++) {
2844                         si_select_se_sh(rdev, i, j);
2845                         data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2846                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2847                 }
2848         }
2849         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2850
2851         mask = 1;
2852         for (i = 0; i < max_rb_num; i++) {
2853                 if (!(disabled_rbs & mask))
2854                         enabled_rbs |= mask;
2855                 mask <<= 1;
2856         }
2857
2858         for (i = 0; i < se_num; i++) {
2859                 si_select_se_sh(rdev, i, 0xffffffff);
2860                 data = 0;
2861                 for (j = 0; j < sh_per_se; j++) {
2862                         switch (enabled_rbs & 3) {
2863                         case 1:
2864                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2865                                 break;
2866                         case 2:
2867                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2868                                 break;
2869                         case 3:
2870                         default:
2871                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2872                                 break;
2873                         }
2874                         enabled_rbs >>= 2;
2875                 }
2876                 WREG32(PA_SC_RASTER_CONFIG, data);
2877         }
2878         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2879 }
2880
2881 static void si_gpu_init(struct radeon_device *rdev)
2882 {
2883         u32 gb_addr_config = 0;
2884         u32 mc_shared_chmap, mc_arb_ramcfg;
2885         u32 sx_debug_1;
2886         u32 hdp_host_path_cntl;
2887         u32 tmp;
2888         int i, j;
2889
2890         switch (rdev->family) {
2891         case CHIP_TAHITI:
2892                 rdev->config.si.max_shader_engines = 2;
2893                 rdev->config.si.max_tile_pipes = 12;
2894                 rdev->config.si.max_cu_per_sh = 8;
2895                 rdev->config.si.max_sh_per_se = 2;
2896                 rdev->config.si.max_backends_per_se = 4;
2897                 rdev->config.si.max_texture_channel_caches = 12;
2898                 rdev->config.si.max_gprs = 256;
2899                 rdev->config.si.max_gs_threads = 32;
2900                 rdev->config.si.max_hw_contexts = 8;
2901
2902                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2903                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2904                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2905                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2906                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2907                 break;
2908         case CHIP_PITCAIRN:
2909                 rdev->config.si.max_shader_engines = 2;
2910                 rdev->config.si.max_tile_pipes = 8;
2911                 rdev->config.si.max_cu_per_sh = 5;
2912                 rdev->config.si.max_sh_per_se = 2;
2913                 rdev->config.si.max_backends_per_se = 4;
2914                 rdev->config.si.max_texture_channel_caches = 8;
2915                 rdev->config.si.max_gprs = 256;
2916                 rdev->config.si.max_gs_threads = 32;
2917                 rdev->config.si.max_hw_contexts = 8;
2918
2919                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2920                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2921                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2922                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2923                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2924                 break;
2925         case CHIP_VERDE:
2926         default:
2927                 rdev->config.si.max_shader_engines = 1;
2928                 rdev->config.si.max_tile_pipes = 4;
2929                 rdev->config.si.max_cu_per_sh = 5;
2930                 rdev->config.si.max_sh_per_se = 2;
2931                 rdev->config.si.max_backends_per_se = 4;
2932                 rdev->config.si.max_texture_channel_caches = 4;
2933                 rdev->config.si.max_gprs = 256;
2934                 rdev->config.si.max_gs_threads = 32;
2935                 rdev->config.si.max_hw_contexts = 8;
2936
2937                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2938                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2939                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2940                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2941                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2942                 break;
2943         case CHIP_OLAND:
2944                 rdev->config.si.max_shader_engines = 1;
2945                 rdev->config.si.max_tile_pipes = 4;
2946                 rdev->config.si.max_cu_per_sh = 6;
2947                 rdev->config.si.max_sh_per_se = 1;
2948                 rdev->config.si.max_backends_per_se = 2;
2949                 rdev->config.si.max_texture_channel_caches = 4;
2950                 rdev->config.si.max_gprs = 256;
2951                 rdev->config.si.max_gs_threads = 16;
2952                 rdev->config.si.max_hw_contexts = 8;
2953
2954                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2955                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2956                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2957                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2958                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2959                 break;
2960         case CHIP_HAINAN:
2961                 rdev->config.si.max_shader_engines = 1;
2962                 rdev->config.si.max_tile_pipes = 4;
2963                 rdev->config.si.max_cu_per_sh = 5;
2964                 rdev->config.si.max_sh_per_se = 1;
2965                 rdev->config.si.max_backends_per_se = 1;
2966                 rdev->config.si.max_texture_channel_caches = 2;
2967                 rdev->config.si.max_gprs = 256;
2968                 rdev->config.si.max_gs_threads = 16;
2969                 rdev->config.si.max_hw_contexts = 8;
2970
2971                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2972                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2973                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2974                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2975                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2976                 break;
2977         }
2978
2979         /* Initialize HDP */
2980         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2981                 WREG32((0x2c14 + j), 0x00000000);
2982                 WREG32((0x2c18 + j), 0x00000000);
2983                 WREG32((0x2c1c + j), 0x00000000);
2984                 WREG32((0x2c20 + j), 0x00000000);
2985                 WREG32((0x2c24 + j), 0x00000000);
2986         }
2987
2988         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2989
2990         evergreen_fix_pci_max_read_req_size(rdev);
2991
2992         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2993
2994         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2995         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2996
2997         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2998         rdev->config.si.mem_max_burst_length_bytes = 256;
2999         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3000         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3001         if (rdev->config.si.mem_row_size_in_kb > 4)
3002                 rdev->config.si.mem_row_size_in_kb = 4;
3003         /* XXX use MC settings? */
3004         rdev->config.si.shader_engine_tile_size = 32;
3005         rdev->config.si.num_gpus = 1;
3006         rdev->config.si.multi_gpu_tile_size = 64;
3007
3008         /* fix up row size */
3009         gb_addr_config &= ~ROW_SIZE_MASK;
3010         switch (rdev->config.si.mem_row_size_in_kb) {
3011         case 1:
3012         default:
3013                 gb_addr_config |= ROW_SIZE(0);
3014                 break;
3015         case 2:
3016                 gb_addr_config |= ROW_SIZE(1);
3017                 break;
3018         case 4:
3019                 gb_addr_config |= ROW_SIZE(2);
3020                 break;
3021         }
3022
3023         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3024          * not have bank info, so create a custom tiling dword.
3025          * bits 3:0   num_pipes
3026          * bits 7:4   num_banks
3027          * bits 11:8  group_size
3028          * bits 15:12 row_size
3029          */
3030         rdev->config.si.tile_config = 0;
3031         switch (rdev->config.si.num_tile_pipes) {
3032         case 1:
3033                 rdev->config.si.tile_config |= (0 << 0);
3034                 break;
3035         case 2:
3036                 rdev->config.si.tile_config |= (1 << 0);
3037                 break;
3038         case 4:
3039                 rdev->config.si.tile_config |= (2 << 0);
3040                 break;
3041         case 8:
3042         default:
3043                 /* XXX what about 12? */
3044                 rdev->config.si.tile_config |= (3 << 0);
3045                 break;
3046         }       
3047         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3048         case 0: /* four banks */
3049                 rdev->config.si.tile_config |= 0 << 4;
3050                 break;
3051         case 1: /* eight banks */
3052                 rdev->config.si.tile_config |= 1 << 4;
3053                 break;
3054         case 2: /* sixteen banks */
3055         default:
3056                 rdev->config.si.tile_config |= 2 << 4;
3057                 break;
3058         }
3059         rdev->config.si.tile_config |=
3060                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3061         rdev->config.si.tile_config |=
3062                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3063
3064         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3065         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3066         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3067         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3068         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3069         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3070         if (rdev->has_uvd) {
3071                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3072                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3073                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3074         }
3075
3076         si_tiling_mode_table_init(rdev);
3077
3078         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3079                     rdev->config.si.max_sh_per_se,
3080                     rdev->config.si.max_backends_per_se);
3081
3082         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3083                      rdev->config.si.max_sh_per_se,
3084                      rdev->config.si.max_cu_per_sh);
3085
3086
3087         /* set HW defaults for 3D engine */
3088         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3089                                      ROQ_IB2_START(0x2b)));
3090         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3091
3092         sx_debug_1 = RREG32(SX_DEBUG_1);
3093         WREG32(SX_DEBUG_1, sx_debug_1);
3094
3095         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3096
3097         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3098                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3099                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3100                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3101
3102         WREG32(VGT_NUM_INSTANCES, 1);
3103
3104         WREG32(CP_PERFMON_CNTL, 0);
3105
3106         WREG32(SQ_CONFIG, 0);
3107
3108         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3109                                           FORCE_EOV_MAX_REZ_CNT(255)));
3110
3111         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3112                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3113
3114         WREG32(VGT_GS_VERTEX_REUSE, 16);
3115         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3116
3117         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3118         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3119         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3120         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3121         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3122         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3123         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3124         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3125
3126         tmp = RREG32(HDP_MISC_CNTL);
3127         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3128         WREG32(HDP_MISC_CNTL, tmp);
3129
3130         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3131         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3132
3133         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3134
3135         udelay(50);
3136 }
3137
3138 /*
3139  * GPU scratch registers helpers function.
3140  */
3141 static void si_scratch_init(struct radeon_device *rdev)
3142 {
3143         int i;
3144
3145         rdev->scratch.num_reg = 7;
3146         rdev->scratch.reg_base = SCRATCH_REG0;
3147         for (i = 0; i < rdev->scratch.num_reg; i++) {
3148                 rdev->scratch.free[i] = true;
3149                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3150         }
3151 }
3152
3153 void si_fence_ring_emit(struct radeon_device *rdev,
3154                         struct radeon_fence *fence)
3155 {
3156         struct radeon_ring *ring = &rdev->ring[fence->ring];
3157         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3158
3159         /* flush read cache over gart */
3160         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3161         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3162         radeon_ring_write(ring, 0);
3163         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3164         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3165                           PACKET3_TC_ACTION_ENA |
3166                           PACKET3_SH_KCACHE_ACTION_ENA |
3167                           PACKET3_SH_ICACHE_ACTION_ENA);
3168         radeon_ring_write(ring, 0xFFFFFFFF);
3169         radeon_ring_write(ring, 0);
3170         radeon_ring_write(ring, 10); /* poll interval */
3171         /* EVENT_WRITE_EOP - flush caches, send int */
3172         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3173         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3174         radeon_ring_write(ring, addr & 0xffffffff);
3175         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3176         radeon_ring_write(ring, fence->seq);
3177         radeon_ring_write(ring, 0);
3178 }
3179
3180 /*
3181  * IB stuff
3182  */
3183 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3184 {
3185         struct radeon_ring *ring = &rdev->ring[ib->ring];
3186         u32 header;
3187
3188         if (ib->is_const_ib) {
3189                 /* set switch buffer packet before const IB */
3190                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3191                 radeon_ring_write(ring, 0);
3192
3193                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3194         } else {
3195                 u32 next_rptr;
3196                 if (ring->rptr_save_reg) {
3197                         next_rptr = ring->wptr + 3 + 4 + 8;
3198                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3199                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3200                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3201                         radeon_ring_write(ring, next_rptr);
3202                 } else if (rdev->wb.enabled) {
3203                         next_rptr = ring->wptr + 5 + 4 + 8;
3204                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3205                         radeon_ring_write(ring, (1 << 8));
3206                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3207                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3208                         radeon_ring_write(ring, next_rptr);
3209                 }
3210
3211                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3212         }
3213
3214         radeon_ring_write(ring, header);
3215         radeon_ring_write(ring,
3216 #ifdef __BIG_ENDIAN
3217                           (2 << 0) |
3218 #endif
3219                           (ib->gpu_addr & 0xFFFFFFFC));
3220         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3221         radeon_ring_write(ring, ib->length_dw |
3222                           (ib->vm ? (ib->vm->id << 24) : 0));
3223
3224         if (!ib->is_const_ib) {
3225                 /* flush read cache over gart for this vmid */
3226                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3227                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3228                 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3229                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3230                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3231                                   PACKET3_TC_ACTION_ENA |
3232                                   PACKET3_SH_KCACHE_ACTION_ENA |
3233                                   PACKET3_SH_ICACHE_ACTION_ENA);
3234                 radeon_ring_write(ring, 0xFFFFFFFF);
3235                 radeon_ring_write(ring, 0);
3236                 radeon_ring_write(ring, 10); /* poll interval */
3237         }
3238 }
3239
3240 /*
3241  * CP.
3242  */
3243 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3244 {
3245         if (enable)
3246                 WREG32(CP_ME_CNTL, 0);
3247         else {
3248                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3249                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3250                 WREG32(SCRATCH_UMSK, 0);
3251                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3252                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3253                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3254         }
3255         udelay(50);
3256 }
3257
3258 static int si_cp_load_microcode(struct radeon_device *rdev)
3259 {
3260         const __be32 *fw_data;
3261         int i;
3262
3263         if (!rdev->me_fw || !rdev->pfp_fw)
3264                 return -EINVAL;
3265
3266         si_cp_enable(rdev, false);
3267
3268         /* PFP */
3269         fw_data = (const __be32 *)rdev->pfp_fw->data;
3270         WREG32(CP_PFP_UCODE_ADDR, 0);
3271         for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3272                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3273         WREG32(CP_PFP_UCODE_ADDR, 0);
3274
3275         /* CE */
3276         fw_data = (const __be32 *)rdev->ce_fw->data;
3277         WREG32(CP_CE_UCODE_ADDR, 0);
3278         for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3279                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3280         WREG32(CP_CE_UCODE_ADDR, 0);
3281
3282         /* ME */
3283         fw_data = (const __be32 *)rdev->me_fw->data;
3284         WREG32(CP_ME_RAM_WADDR, 0);
3285         for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3286                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3287         WREG32(CP_ME_RAM_WADDR, 0);
3288
3289         WREG32(CP_PFP_UCODE_ADDR, 0);
3290         WREG32(CP_CE_UCODE_ADDR, 0);
3291         WREG32(CP_ME_RAM_WADDR, 0);
3292         WREG32(CP_ME_RAM_RADDR, 0);
3293         return 0;
3294 }
3295
3296 static int si_cp_start(struct radeon_device *rdev)
3297 {
3298         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3299         int r, i;
3300
3301         r = radeon_ring_lock(rdev, ring, 7 + 4);
3302         if (r) {
3303                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3304                 return r;
3305         }
3306         /* init the CP */
3307         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3308         radeon_ring_write(ring, 0x1);
3309         radeon_ring_write(ring, 0x0);
3310         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3311         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3312         radeon_ring_write(ring, 0);
3313         radeon_ring_write(ring, 0);
3314
3315         /* init the CE partitions */
3316         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3317         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3318         radeon_ring_write(ring, 0xc000);
3319         radeon_ring_write(ring, 0xe000);
3320         radeon_ring_unlock_commit(rdev, ring);
3321
3322         si_cp_enable(rdev, true);
3323
3324         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3325         if (r) {
3326                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3327                 return r;
3328         }
3329
3330         /* setup clear context state */
3331         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3332         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3333
3334         for (i = 0; i < si_default_size; i++)
3335                 radeon_ring_write(ring, si_default_state[i]);
3336
3337         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3338         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3339
3340         /* set clear context state */
3341         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3342         radeon_ring_write(ring, 0);
3343
3344         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3345         radeon_ring_write(ring, 0x00000316);
3346         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3347         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3348
3349         radeon_ring_unlock_commit(rdev, ring);
3350
3351         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3352                 ring = &rdev->ring[i];
3353                 r = radeon_ring_lock(rdev, ring, 2);
3354
3355                 /* clear the compute context state */
3356                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3357                 radeon_ring_write(ring, 0);
3358
3359                 radeon_ring_unlock_commit(rdev, ring);
3360         }
3361
3362         return 0;
3363 }
3364
3365 static void si_cp_fini(struct radeon_device *rdev)
3366 {
3367         struct radeon_ring *ring;
3368         si_cp_enable(rdev, false);
3369
3370         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3371         radeon_ring_fini(rdev, ring);
3372         radeon_scratch_free(rdev, ring->rptr_save_reg);
3373
3374         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3375         radeon_ring_fini(rdev, ring);
3376         radeon_scratch_free(rdev, ring->rptr_save_reg);
3377
3378         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3379         radeon_ring_fini(rdev, ring);
3380         radeon_scratch_free(rdev, ring->rptr_save_reg);
3381 }
3382
3383 static int si_cp_resume(struct radeon_device *rdev)
3384 {
3385         struct radeon_ring *ring;
3386         u32 tmp;
3387         u32 rb_bufsz;
3388         int r;
3389
3390         si_enable_gui_idle_interrupt(rdev, false);
3391
3392         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3393         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3394
3395         /* Set the write pointer delay */
3396         WREG32(CP_RB_WPTR_DELAY, 0);
3397
3398         WREG32(CP_DEBUG, 0);
3399         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3400
3401         /* ring 0 - compute and gfx */
3402         /* Set ring buffer size */
3403         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3404         rb_bufsz = order_base_2(ring->ring_size / 8);
3405         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3406 #ifdef __BIG_ENDIAN
3407         tmp |= BUF_SWAP_32BIT;
3408 #endif
3409         WREG32(CP_RB0_CNTL, tmp);
3410
3411         /* Initialize the ring buffer's read and write pointers */
3412         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3413         ring->wptr = 0;
3414         WREG32(CP_RB0_WPTR, ring->wptr);
3415
3416         /* set the wb address whether it's enabled or not */
3417         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3418         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3419
3420         if (rdev->wb.enabled)
3421                 WREG32(SCRATCH_UMSK, 0xff);
3422         else {
3423                 tmp |= RB_NO_UPDATE;
3424                 WREG32(SCRATCH_UMSK, 0);
3425         }
3426
3427         mdelay(1);
3428         WREG32(CP_RB0_CNTL, tmp);
3429
3430         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3431
3432         ring->rptr = RREG32(CP_RB0_RPTR);
3433
3434         /* ring1  - compute only */
3435         /* Set ring buffer size */
3436         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3437         rb_bufsz = order_base_2(ring->ring_size / 8);
3438         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3439 #ifdef __BIG_ENDIAN
3440         tmp |= BUF_SWAP_32BIT;
3441 #endif
3442         WREG32(CP_RB1_CNTL, tmp);
3443
3444         /* Initialize the ring buffer's read and write pointers */
3445         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3446         ring->wptr = 0;
3447         WREG32(CP_RB1_WPTR, ring->wptr);
3448
3449         /* set the wb address whether it's enabled or not */
3450         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3451         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3452
3453         mdelay(1);
3454         WREG32(CP_RB1_CNTL, tmp);
3455
3456         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3457
3458         ring->rptr = RREG32(CP_RB1_RPTR);
3459
3460         /* ring2 - compute only */
3461         /* Set ring buffer size */
3462         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3463         rb_bufsz = order_base_2(ring->ring_size / 8);
3464         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3465 #ifdef __BIG_ENDIAN
3466         tmp |= BUF_SWAP_32BIT;
3467 #endif
3468         WREG32(CP_RB2_CNTL, tmp);
3469
3470         /* Initialize the ring buffer's read and write pointers */
3471         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3472         ring->wptr = 0;
3473         WREG32(CP_RB2_WPTR, ring->wptr);
3474
3475         /* set the wb address whether it's enabled or not */
3476         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3477         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3478
3479         mdelay(1);
3480         WREG32(CP_RB2_CNTL, tmp);
3481
3482         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3483
3484         ring->rptr = RREG32(CP_RB2_RPTR);
3485
3486         /* start the rings */
3487         si_cp_start(rdev);
3488         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3489         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3490         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3491         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3492         if (r) {
3493                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3494                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3495                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3496                 return r;
3497         }
3498         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3499         if (r) {
3500                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3501         }
3502         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3503         if (r) {
3504                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3505         }
3506
3507         si_enable_gui_idle_interrupt(rdev, true);
3508
3509         return 0;
3510 }
3511
3512 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3513 {
3514         u32 reset_mask = 0;
3515         u32 tmp;
3516
3517         /* GRBM_STATUS */
3518         tmp = RREG32(GRBM_STATUS);
3519         if (tmp & (PA_BUSY | SC_BUSY |
3520                    BCI_BUSY | SX_BUSY |
3521                    TA_BUSY | VGT_BUSY |
3522                    DB_BUSY | CB_BUSY |
3523                    GDS_BUSY | SPI_BUSY |
3524                    IA_BUSY | IA_BUSY_NO_DMA))
3525                 reset_mask |= RADEON_RESET_GFX;
3526
3527         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3528                    CP_BUSY | CP_COHERENCY_BUSY))
3529                 reset_mask |= RADEON_RESET_CP;
3530
3531         if (tmp & GRBM_EE_BUSY)
3532                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3533
3534         /* GRBM_STATUS2 */
3535         tmp = RREG32(GRBM_STATUS2);
3536         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3537                 reset_mask |= RADEON_RESET_RLC;
3538
3539         /* DMA_STATUS_REG 0 */
3540         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3541         if (!(tmp & DMA_IDLE))
3542                 reset_mask |= RADEON_RESET_DMA;
3543
3544         /* DMA_STATUS_REG 1 */
3545         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3546         if (!(tmp & DMA_IDLE))
3547                 reset_mask |= RADEON_RESET_DMA1;
3548
3549         /* SRBM_STATUS2 */
3550         tmp = RREG32(SRBM_STATUS2);
3551         if (tmp & DMA_BUSY)
3552                 reset_mask |= RADEON_RESET_DMA;
3553
3554         if (tmp & DMA1_BUSY)
3555                 reset_mask |= RADEON_RESET_DMA1;
3556
3557         /* SRBM_STATUS */
3558         tmp = RREG32(SRBM_STATUS);
3559
3560         if (tmp & IH_BUSY)
3561                 reset_mask |= RADEON_RESET_IH;
3562
3563         if (tmp & SEM_BUSY)
3564                 reset_mask |= RADEON_RESET_SEM;
3565
3566         if (tmp & GRBM_RQ_PENDING)
3567                 reset_mask |= RADEON_RESET_GRBM;
3568
3569         if (tmp & VMC_BUSY)
3570                 reset_mask |= RADEON_RESET_VMC;
3571
3572         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3573                    MCC_BUSY | MCD_BUSY))
3574                 reset_mask |= RADEON_RESET_MC;
3575
3576         if (evergreen_is_display_hung(rdev))
3577                 reset_mask |= RADEON_RESET_DISPLAY;
3578
3579         /* VM_L2_STATUS */
3580         tmp = RREG32(VM_L2_STATUS);
3581         if (tmp & L2_BUSY)
3582                 reset_mask |= RADEON_RESET_VMC;
3583
3584         /* Skip MC reset as it's mostly likely not hung, just busy */
3585         if (reset_mask & RADEON_RESET_MC) {
3586                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3587                 reset_mask &= ~RADEON_RESET_MC;
3588         }
3589
3590         return reset_mask;
3591 }
3592
3593 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3594 {
3595         struct evergreen_mc_save save;
3596         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3597         u32 tmp;
3598
3599         if (reset_mask == 0)
3600                 return;
3601
3602         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3603
3604         evergreen_print_gpu_status_regs(rdev);
3605         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3606                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3607         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3608                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3609
3610         /* disable PG/CG */
3611         si_fini_pg(rdev);
3612         si_fini_cg(rdev);
3613
3614         /* stop the rlc */
3615         si_rlc_stop(rdev);
3616
3617         /* Disable CP parsing/prefetching */
3618         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3619
3620         if (reset_mask & RADEON_RESET_DMA) {
3621                 /* dma0 */
3622                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3623                 tmp &= ~DMA_RB_ENABLE;
3624                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3625         }
3626         if (reset_mask & RADEON_RESET_DMA1) {
3627                 /* dma1 */
3628                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3629                 tmp &= ~DMA_RB_ENABLE;
3630                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3631         }
3632
3633         udelay(50);
3634
3635         evergreen_mc_stop(rdev, &save);
3636         if (evergreen_mc_wait_for_idle(rdev)) {
3637                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3638         }
3639
3640         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3641                 grbm_soft_reset = SOFT_RESET_CB |
3642                         SOFT_RESET_DB |
3643                         SOFT_RESET_GDS |
3644                         SOFT_RESET_PA |
3645                         SOFT_RESET_SC |
3646                         SOFT_RESET_BCI |
3647                         SOFT_RESET_SPI |
3648                         SOFT_RESET_SX |
3649                         SOFT_RESET_TC |
3650                         SOFT_RESET_TA |
3651                         SOFT_RESET_VGT |
3652                         SOFT_RESET_IA;
3653         }
3654
3655         if (reset_mask & RADEON_RESET_CP) {
3656                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3657
3658                 srbm_soft_reset |= SOFT_RESET_GRBM;
3659         }
3660
3661         if (reset_mask & RADEON_RESET_DMA)
3662                 srbm_soft_reset |= SOFT_RESET_DMA;
3663
3664         if (reset_mask & RADEON_RESET_DMA1)
3665                 srbm_soft_reset |= SOFT_RESET_DMA1;
3666
3667         if (reset_mask & RADEON_RESET_DISPLAY)
3668                 srbm_soft_reset |= SOFT_RESET_DC;
3669
3670         if (reset_mask & RADEON_RESET_RLC)
3671                 grbm_soft_reset |= SOFT_RESET_RLC;
3672
3673         if (reset_mask & RADEON_RESET_SEM)
3674                 srbm_soft_reset |= SOFT_RESET_SEM;
3675
3676         if (reset_mask & RADEON_RESET_IH)
3677                 srbm_soft_reset |= SOFT_RESET_IH;
3678
3679         if (reset_mask & RADEON_RESET_GRBM)
3680                 srbm_soft_reset |= SOFT_RESET_GRBM;
3681
3682         if (reset_mask & RADEON_RESET_VMC)
3683                 srbm_soft_reset |= SOFT_RESET_VMC;
3684
3685         if (reset_mask & RADEON_RESET_MC)
3686                 srbm_soft_reset |= SOFT_RESET_MC;
3687
3688         if (grbm_soft_reset) {
3689                 tmp = RREG32(GRBM_SOFT_RESET);
3690                 tmp |= grbm_soft_reset;
3691                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3692                 WREG32(GRBM_SOFT_RESET, tmp);
3693                 tmp = RREG32(GRBM_SOFT_RESET);
3694
3695                 udelay(50);
3696
3697                 tmp &= ~grbm_soft_reset;
3698                 WREG32(GRBM_SOFT_RESET, tmp);
3699                 tmp = RREG32(GRBM_SOFT_RESET);
3700         }
3701
3702         if (srbm_soft_reset) {
3703                 tmp = RREG32(SRBM_SOFT_RESET);
3704                 tmp |= srbm_soft_reset;
3705                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3706                 WREG32(SRBM_SOFT_RESET, tmp);
3707                 tmp = RREG32(SRBM_SOFT_RESET);
3708
3709                 udelay(50);
3710
3711                 tmp &= ~srbm_soft_reset;
3712                 WREG32(SRBM_SOFT_RESET, tmp);
3713                 tmp = RREG32(SRBM_SOFT_RESET);
3714         }
3715
3716         /* Wait a little for things to settle down */
3717         udelay(50);
3718
3719         evergreen_mc_resume(rdev, &save);
3720         udelay(50);
3721
3722         evergreen_print_gpu_status_regs(rdev);
3723 }
3724
3725 int si_asic_reset(struct radeon_device *rdev)
3726 {
3727         u32 reset_mask;
3728
3729         reset_mask = si_gpu_check_soft_reset(rdev);
3730
3731         if (reset_mask)
3732                 r600_set_bios_scratch_engine_hung(rdev, true);
3733
3734         si_gpu_soft_reset(rdev, reset_mask);
3735
3736         reset_mask = si_gpu_check_soft_reset(rdev);
3737
3738         if (!reset_mask)
3739                 r600_set_bios_scratch_engine_hung(rdev, false);
3740
3741         return 0;
3742 }
3743
3744 /**
3745  * si_gfx_is_lockup - Check if the GFX engine is locked up
3746  *
3747  * @rdev: radeon_device pointer
3748  * @ring: radeon_ring structure holding ring information
3749  *
3750  * Check if the GFX engine is locked up.
3751  * Returns true if the engine appears to be locked up, false if not.
3752  */
3753 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3754 {
3755         u32 reset_mask = si_gpu_check_soft_reset(rdev);
3756
3757         if (!(reset_mask & (RADEON_RESET_GFX |
3758                             RADEON_RESET_COMPUTE |
3759                             RADEON_RESET_CP))) {
3760                 radeon_ring_lockup_update(ring);
3761                 return false;
3762         }
3763         /* force CP activities */
3764         radeon_ring_force_activity(rdev, ring);
3765         return radeon_ring_test_lockup(rdev, ring);
3766 }
3767
3768 /* MC */
3769 static void si_mc_program(struct radeon_device *rdev)
3770 {
3771         struct evergreen_mc_save save;
3772         u32 tmp;
3773         int i, j;
3774
3775         /* Initialize HDP */
3776         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3777                 WREG32((0x2c14 + j), 0x00000000);
3778                 WREG32((0x2c18 + j), 0x00000000);
3779                 WREG32((0x2c1c + j), 0x00000000);
3780                 WREG32((0x2c20 + j), 0x00000000);
3781                 WREG32((0x2c24 + j), 0x00000000);
3782         }
3783         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3784
3785         evergreen_mc_stop(rdev, &save);
3786         if (radeon_mc_wait_for_idle(rdev)) {
3787                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3788         }
3789         if (!ASIC_IS_NODCE(rdev))
3790                 /* Lockout access through VGA aperture*/
3791                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3792         /* Update configuration */
3793         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3794                rdev->mc.vram_start >> 12);
3795         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3796                rdev->mc.vram_end >> 12);
3797         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3798                rdev->vram_scratch.gpu_addr >> 12);
3799         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3800         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3801         WREG32(MC_VM_FB_LOCATION, tmp);
3802         /* XXX double check these! */
3803         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3804         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3805         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3806         WREG32(MC_VM_AGP_BASE, 0);
3807         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3808         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3809         if (radeon_mc_wait_for_idle(rdev)) {
3810                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3811         }
3812         evergreen_mc_resume(rdev, &save);
3813         if (!ASIC_IS_NODCE(rdev)) {
3814                 /* we need to own VRAM, so turn off the VGA renderer here
3815                  * to stop it overwriting our objects */
3816                 rv515_vga_render_disable(rdev);
3817         }
3818 }
3819
3820 void si_vram_gtt_location(struct radeon_device *rdev,
3821                           struct radeon_mc *mc)
3822 {
3823         if (mc->mc_vram_size > 0xFFC0000000ULL) {
3824                 /* leave room for at least 1024M GTT */
3825                 dev_warn(rdev->dev, "limiting VRAM\n");
3826                 mc->real_vram_size = 0xFFC0000000ULL;
3827                 mc->mc_vram_size = 0xFFC0000000ULL;
3828         }
3829         radeon_vram_location(rdev, &rdev->mc, 0);
3830         rdev->mc.gtt_base_align = 0;
3831         radeon_gtt_location(rdev, mc);
3832 }
3833
3834 static int si_mc_init(struct radeon_device *rdev)
3835 {
3836         u32 tmp;
3837         int chansize, numchan;
3838
3839         /* Get VRAM informations */
3840         rdev->mc.vram_is_ddr = true;
3841         tmp = RREG32(MC_ARB_RAMCFG);
3842         if (tmp & CHANSIZE_OVERRIDE) {
3843                 chansize = 16;
3844         } else if (tmp & CHANSIZE_MASK) {
3845                 chansize = 64;
3846         } else {
3847                 chansize = 32;
3848         }
3849         tmp = RREG32(MC_SHARED_CHMAP);
3850         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3851         case 0:
3852         default:
3853                 numchan = 1;
3854                 break;
3855         case 1:
3856                 numchan = 2;
3857                 break;
3858         case 2:
3859                 numchan = 4;
3860                 break;
3861         case 3:
3862                 numchan = 8;
3863                 break;
3864         case 4:
3865                 numchan = 3;
3866                 break;
3867         case 5:
3868                 numchan = 6;
3869                 break;
3870         case 6:
3871                 numchan = 10;
3872                 break;
3873         case 7:
3874                 numchan = 12;
3875                 break;
3876         case 8:
3877                 numchan = 16;
3878                 break;
3879         }
3880         rdev->mc.vram_width = numchan * chansize;
3881         /* Could aper size report 0 ? */
3882         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3883         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3884         /* size in MB on si */
3885         tmp = RREG32(CONFIG_MEMSIZE);
3886         /* some boards may have garbage in the upper 16 bits */
3887         if (tmp & 0xffff0000) {
3888                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
3889                 if (tmp & 0xffff)
3890                         tmp &= 0xffff;
3891         }
3892         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
3893         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
3894         rdev->mc.visible_vram_size = rdev->mc.aper_size;
3895         si_vram_gtt_location(rdev, &rdev->mc);
3896         radeon_update_bandwidth_info(rdev);
3897
3898         return 0;
3899 }
3900
3901 /*
3902  * GART
3903  */
3904 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3905 {
3906         /* flush hdp cache */
3907         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3908
3909         /* bits 0-15 are the VM contexts0-15 */
3910         WREG32(VM_INVALIDATE_REQUEST, 1);
3911 }
3912
3913 static int si_pcie_gart_enable(struct radeon_device *rdev)
3914 {
3915         int r, i;
3916
3917         if (rdev->gart.robj == NULL) {
3918                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3919                 return -EINVAL;
3920         }
3921         r = radeon_gart_table_vram_pin(rdev);
3922         if (r)
3923                 return r;
3924         radeon_gart_restore(rdev);
3925         /* Setup TLB control */
3926         WREG32(MC_VM_MX_L1_TLB_CNTL,
3927                (0xA << 7) |
3928                ENABLE_L1_TLB |
3929                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3930                ENABLE_ADVANCED_DRIVER_MODEL |
3931                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3932         /* Setup L2 cache */
3933         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3934                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3935                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3936                EFFECTIVE_L2_QUEUE_SIZE(7) |
3937                CONTEXT1_IDENTITY_ACCESS_MODE(1));
3938         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3939         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3940                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3941         /* setup context0 */
3942         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3943         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3944         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3945         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3946                         (u32)(rdev->dummy_page.addr >> 12));
3947         WREG32(VM_CONTEXT0_CNTL2, 0);
3948         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3949                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3950
3951         WREG32(0x15D4, 0);
3952         WREG32(0x15D8, 0);
3953         WREG32(0x15DC, 0);
3954
3955         /* empty context1-15 */
3956         /* set vm size, must be a multiple of 4 */
3957         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3958         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3959         /* Assign the pt base to something valid for now; the pts used for
3960          * the VMs are determined by the application and setup and assigned
3961          * on the fly in the vm part of radeon_gart.c
3962          */
3963         for (i = 1; i < 16; i++) {
3964                 if (i < 8)
3965                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3966                                rdev->gart.table_addr >> 12);
3967                 else
3968                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3969                                rdev->gart.table_addr >> 12);
3970         }
3971
3972         /* enable context1-15 */
3973         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3974                (u32)(rdev->dummy_page.addr >> 12));
3975         WREG32(VM_CONTEXT1_CNTL2, 4);
3976         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3977                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3978                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3979                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3980                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3981                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3982                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3983                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3984                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3985                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3986                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3987                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3988                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3989
3990         si_pcie_gart_tlb_flush(rdev);
3991         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3992                  (unsigned)(rdev->mc.gtt_size >> 20),
3993                  (unsigned long long)rdev->gart.table_addr);
3994         rdev->gart.ready = true;
3995         return 0;
3996 }
3997
3998 static void si_pcie_gart_disable(struct radeon_device *rdev)
3999 {
4000         /* Disable all tables */
4001         WREG32(VM_CONTEXT0_CNTL, 0);
4002         WREG32(VM_CONTEXT1_CNTL, 0);
4003         /* Setup TLB control */
4004         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4005                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4006         /* Setup L2 cache */
4007         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4008                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4009                EFFECTIVE_L2_QUEUE_SIZE(7) |
4010                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4011         WREG32(VM_L2_CNTL2, 0);
4012         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4013                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4014         radeon_gart_table_vram_unpin(rdev);
4015 }
4016
4017 static void si_pcie_gart_fini(struct radeon_device *rdev)
4018 {
4019         si_pcie_gart_disable(rdev);
4020         radeon_gart_table_vram_free(rdev);
4021         radeon_gart_fini(rdev);
4022 }
4023
4024 /* vm parser */
4025 static bool si_vm_reg_valid(u32 reg)
4026 {
4027         /* context regs are fine */
4028         if (reg >= 0x28000)
4029                 return true;
4030
4031         /* check config regs */
4032         switch (reg) {
4033         case GRBM_GFX_INDEX:
4034         case CP_STRMOUT_CNTL:
4035         case VGT_VTX_VECT_EJECT_REG:
4036         case VGT_CACHE_INVALIDATION:
4037         case VGT_ESGS_RING_SIZE:
4038         case VGT_GSVS_RING_SIZE:
4039         case VGT_GS_VERTEX_REUSE:
4040         case VGT_PRIMITIVE_TYPE:
4041         case VGT_INDEX_TYPE:
4042         case VGT_NUM_INDICES:
4043         case VGT_NUM_INSTANCES:
4044         case VGT_TF_RING_SIZE:
4045         case VGT_HS_OFFCHIP_PARAM:
4046         case VGT_TF_MEMORY_BASE:
4047         case PA_CL_ENHANCE:
4048         case PA_SU_LINE_STIPPLE_VALUE:
4049         case PA_SC_LINE_STIPPLE_STATE:
4050         case PA_SC_ENHANCE:
4051         case SQC_CACHES:
4052         case SPI_STATIC_THREAD_MGMT_1:
4053         case SPI_STATIC_THREAD_MGMT_2:
4054         case SPI_STATIC_THREAD_MGMT_3:
4055         case SPI_PS_MAX_WAVE_ID:
4056         case SPI_CONFIG_CNTL:
4057         case SPI_CONFIG_CNTL_1:
4058         case TA_CNTL_AUX:
4059                 return true;
4060         default:
4061                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4062                 return false;
4063         }
4064 }
4065
4066 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4067                                   u32 *ib, struct radeon_cs_packet *pkt)
4068 {
4069         switch (pkt->opcode) {
4070         case PACKET3_NOP:
4071         case PACKET3_SET_BASE:
4072         case PACKET3_SET_CE_DE_COUNTERS:
4073         case PACKET3_LOAD_CONST_RAM:
4074         case PACKET3_WRITE_CONST_RAM:
4075         case PACKET3_WRITE_CONST_RAM_OFFSET:
4076         case PACKET3_DUMP_CONST_RAM:
4077         case PACKET3_INCREMENT_CE_COUNTER:
4078         case PACKET3_WAIT_ON_DE_COUNTER:
4079         case PACKET3_CE_WRITE:
4080                 break;
4081         default:
4082                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4083                 return -EINVAL;
4084         }
4085         return 0;
4086 }
4087
4088 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4089 {
4090         u32 start_reg, reg, i;
4091         u32 command = ib[idx + 4];
4092         u32 info = ib[idx + 1];
4093         u32 idx_value = ib[idx];
4094         if (command & PACKET3_CP_DMA_CMD_SAS) {
4095                 /* src address space is register */
4096                 if (((info & 0x60000000) >> 29) == 0) {
4097                         start_reg = idx_value << 2;
4098                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4099                                 reg = start_reg;
4100                                 if (!si_vm_reg_valid(reg)) {
4101                                         DRM_ERROR("CP DMA Bad SRC register\n");
4102                                         return -EINVAL;
4103                                 }
4104                         } else {
4105                                 for (i = 0; i < (command & 0x1fffff); i++) {
4106                                         reg = start_reg + (4 * i);
4107                                         if (!si_vm_reg_valid(reg)) {
4108                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4109                                                 return -EINVAL;
4110                                         }
4111                                 }
4112                         }
4113                 }
4114         }
4115         if (command & PACKET3_CP_DMA_CMD_DAS) {
4116                 /* dst address space is register */
4117                 if (((info & 0x00300000) >> 20) == 0) {
4118                         start_reg = ib[idx + 2];
4119                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4120                                 reg = start_reg;
4121                                 if (!si_vm_reg_valid(reg)) {
4122                                         DRM_ERROR("CP DMA Bad DST register\n");
4123                                         return -EINVAL;
4124                                 }
4125                         } else {
4126                                 for (i = 0; i < (command & 0x1fffff); i++) {
4127                                         reg = start_reg + (4 * i);
4128                                 if (!si_vm_reg_valid(reg)) {
4129                                                 DRM_ERROR("CP DMA Bad DST register\n");
4130                                                 return -EINVAL;
4131                                         }
4132                                 }
4133                         }
4134                 }
4135         }
4136         return 0;
4137 }
4138
4139 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4140                                    u32 *ib, struct radeon_cs_packet *pkt)
4141 {
4142         int r;
4143         u32 idx = pkt->idx + 1;
4144         u32 idx_value = ib[idx];
4145         u32 start_reg, end_reg, reg, i;
4146
4147         switch (pkt->opcode) {
4148         case PACKET3_NOP:
4149         case PACKET3_SET_BASE:
4150         case PACKET3_CLEAR_STATE:
4151         case PACKET3_INDEX_BUFFER_SIZE:
4152         case PACKET3_DISPATCH_DIRECT:
4153         case PACKET3_DISPATCH_INDIRECT:
4154         case PACKET3_ALLOC_GDS:
4155         case PACKET3_WRITE_GDS_RAM:
4156         case PACKET3_ATOMIC_GDS:
4157         case PACKET3_ATOMIC:
4158         case PACKET3_OCCLUSION_QUERY:
4159         case PACKET3_SET_PREDICATION:
4160         case PACKET3_COND_EXEC:
4161         case PACKET3_PRED_EXEC:
4162         case PACKET3_DRAW_INDIRECT:
4163         case PACKET3_DRAW_INDEX_INDIRECT:
4164         case PACKET3_INDEX_BASE:
4165         case PACKET3_DRAW_INDEX_2:
4166         case PACKET3_CONTEXT_CONTROL:
4167         case PACKET3_INDEX_TYPE:
4168         case PACKET3_DRAW_INDIRECT_MULTI:
4169         case PACKET3_DRAW_INDEX_AUTO:
4170         case PACKET3_DRAW_INDEX_IMMD:
4171         case PACKET3_NUM_INSTANCES:
4172         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4173         case PACKET3_STRMOUT_BUFFER_UPDATE:
4174         case PACKET3_DRAW_INDEX_OFFSET_2:
4175         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4176         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4177         case PACKET3_MPEG_INDEX:
4178         case PACKET3_WAIT_REG_MEM:
4179         case PACKET3_MEM_WRITE:
4180         case PACKET3_PFP_SYNC_ME:
4181         case PACKET3_SURFACE_SYNC:
4182         case PACKET3_EVENT_WRITE:
4183         case PACKET3_EVENT_WRITE_EOP:
4184         case PACKET3_EVENT_WRITE_EOS:
4185         case PACKET3_SET_CONTEXT_REG:
4186         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4187         case PACKET3_SET_SH_REG:
4188         case PACKET3_SET_SH_REG_OFFSET:
4189         case PACKET3_INCREMENT_DE_COUNTER:
4190         case PACKET3_WAIT_ON_CE_COUNTER:
4191         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4192         case PACKET3_ME_WRITE:
4193                 break;
4194         case PACKET3_COPY_DATA:
4195                 if ((idx_value & 0xf00) == 0) {
4196                         reg = ib[idx + 3] * 4;
4197                         if (!si_vm_reg_valid(reg))
4198                                 return -EINVAL;
4199                 }
4200                 break;
4201         case PACKET3_WRITE_DATA:
4202                 if ((idx_value & 0xf00) == 0) {
4203                         start_reg = ib[idx + 1] * 4;
4204                         if (idx_value & 0x10000) {
4205                                 if (!si_vm_reg_valid(start_reg))
4206                                         return -EINVAL;
4207                         } else {
4208                                 for (i = 0; i < (pkt->count - 2); i++) {
4209                                         reg = start_reg + (4 * i);
4210                                         if (!si_vm_reg_valid(reg))
4211                                                 return -EINVAL;
4212                                 }
4213                         }
4214                 }
4215                 break;
4216         case PACKET3_COND_WRITE:
4217                 if (idx_value & 0x100) {
4218                         reg = ib[idx + 5] * 4;
4219                         if (!si_vm_reg_valid(reg))
4220                                 return -EINVAL;
4221                 }
4222                 break;
4223         case PACKET3_COPY_DW:
4224                 if (idx_value & 0x2) {
4225                         reg = ib[idx + 3] * 4;
4226                         if (!si_vm_reg_valid(reg))
4227                                 return -EINVAL;
4228                 }
4229                 break;
4230         case PACKET3_SET_CONFIG_REG:
4231                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4232                 end_reg = 4 * pkt->count + start_reg - 4;
4233                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4234                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4235                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4236                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4237                         return -EINVAL;
4238                 }
4239                 for (i = 0; i < pkt->count; i++) {
4240                         reg = start_reg + (4 * i);
4241                         if (!si_vm_reg_valid(reg))
4242                                 return -EINVAL;
4243                 }
4244                 break;
4245         case PACKET3_CP_DMA:
4246                 r = si_vm_packet3_cp_dma_check(ib, idx);
4247                 if (r)
4248                         return r;
4249                 break;
4250         default:
4251                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4252                 return -EINVAL;
4253         }
4254         return 0;
4255 }
4256
4257 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4258                                        u32 *ib, struct radeon_cs_packet *pkt)
4259 {
4260         int r;
4261         u32 idx = pkt->idx + 1;
4262         u32 idx_value = ib[idx];
4263         u32 start_reg, reg, i;
4264
4265         switch (pkt->opcode) {
4266         case PACKET3_NOP:
4267         case PACKET3_SET_BASE:
4268         case PACKET3_CLEAR_STATE:
4269         case PACKET3_DISPATCH_DIRECT:
4270         case PACKET3_DISPATCH_INDIRECT:
4271         case PACKET3_ALLOC_GDS:
4272         case PACKET3_WRITE_GDS_RAM:
4273         case PACKET3_ATOMIC_GDS:
4274         case PACKET3_ATOMIC:
4275         case PACKET3_OCCLUSION_QUERY:
4276         case PACKET3_SET_PREDICATION:
4277         case PACKET3_COND_EXEC:
4278         case PACKET3_PRED_EXEC:
4279         case PACKET3_CONTEXT_CONTROL:
4280         case PACKET3_STRMOUT_BUFFER_UPDATE:
4281         case PACKET3_WAIT_REG_MEM:
4282         case PACKET3_MEM_WRITE:
4283         case PACKET3_PFP_SYNC_ME:
4284         case PACKET3_SURFACE_SYNC:
4285         case PACKET3_EVENT_WRITE:
4286         case PACKET3_EVENT_WRITE_EOP:
4287         case PACKET3_EVENT_WRITE_EOS:
4288         case PACKET3_SET_CONTEXT_REG:
4289         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4290         case PACKET3_SET_SH_REG:
4291         case PACKET3_SET_SH_REG_OFFSET:
4292         case PACKET3_INCREMENT_DE_COUNTER:
4293         case PACKET3_WAIT_ON_CE_COUNTER:
4294         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4295         case PACKET3_ME_WRITE:
4296                 break;
4297         case PACKET3_COPY_DATA:
4298                 if ((idx_value & 0xf00) == 0) {
4299                         reg = ib[idx + 3] * 4;
4300                         if (!si_vm_reg_valid(reg))
4301                                 return -EINVAL;
4302                 }
4303                 break;
4304         case PACKET3_WRITE_DATA:
4305                 if ((idx_value & 0xf00) == 0) {
4306                         start_reg = ib[idx + 1] * 4;
4307                         if (idx_value & 0x10000) {
4308                                 if (!si_vm_reg_valid(start_reg))
4309                                         return -EINVAL;
4310                         } else {
4311                                 for (i = 0; i < (pkt->count - 2); i++) {
4312                                         reg = start_reg + (4 * i);
4313                                         if (!si_vm_reg_valid(reg))
4314                                                 return -EINVAL;
4315                                 }
4316                         }
4317                 }
4318                 break;
4319         case PACKET3_COND_WRITE:
4320                 if (idx_value & 0x100) {
4321                         reg = ib[idx + 5] * 4;
4322                         if (!si_vm_reg_valid(reg))
4323                                 return -EINVAL;
4324                 }
4325                 break;
4326         case PACKET3_COPY_DW:
4327                 if (idx_value & 0x2) {
4328                         reg = ib[idx + 3] * 4;
4329                         if (!si_vm_reg_valid(reg))
4330                                 return -EINVAL;
4331                 }
4332                 break;
4333         case PACKET3_CP_DMA:
4334                 r = si_vm_packet3_cp_dma_check(ib, idx);
4335                 if (r)
4336                         return r;
4337                 break;
4338         default:
4339                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4340                 return -EINVAL;
4341         }
4342         return 0;
4343 }
4344
4345 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4346 {
4347         int ret = 0;
4348         u32 idx = 0;
4349         struct radeon_cs_packet pkt;
4350
4351         do {
4352                 pkt.idx = idx;
4353                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4354                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4355                 pkt.one_reg_wr = 0;
4356                 switch (pkt.type) {
4357                 case RADEON_PACKET_TYPE0:
4358                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4359                         ret = -EINVAL;
4360                         break;
4361                 case RADEON_PACKET_TYPE2:
4362                         idx += 1;
4363                         break;
4364                 case RADEON_PACKET_TYPE3:
4365                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4366                         if (ib->is_const_ib)
4367                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4368                         else {
4369                                 switch (ib->ring) {
4370                                 case RADEON_RING_TYPE_GFX_INDEX:
4371                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4372                                         break;
4373                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4374                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4375                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4376                                         break;
4377                                 default:
4378                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4379                                         ret = -EINVAL;
4380                                         break;
4381                                 }
4382                         }
4383                         idx += pkt.count + 2;
4384                         break;
4385                 default:
4386                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4387                         ret = -EINVAL;
4388                         break;
4389                 }
4390                 if (ret)
4391                         break;
4392         } while (idx < ib->length_dw);
4393
4394         return ret;
4395 }
4396
4397 /*
4398  * vm
4399  */
4400 int si_vm_init(struct radeon_device *rdev)
4401 {
4402         /* number of VMs */
4403         rdev->vm_manager.nvm = 16;
4404         /* base offset of vram pages */
4405         rdev->vm_manager.vram_base_offset = 0;
4406
4407         return 0;
4408 }
4409
4410 void si_vm_fini(struct radeon_device *rdev)
4411 {
4412 }
4413
4414 /**
4415  * si_vm_decode_fault - print human readable fault info
4416  *
4417  * @rdev: radeon_device pointer
4418  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4419  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4420  *
4421  * Print human readable fault information (SI).
4422  */
4423 static void si_vm_decode_fault(struct radeon_device *rdev,
4424                                u32 status, u32 addr)
4425 {
4426         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4427         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4428         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4429         char *block;
4430
4431         if (rdev->family == CHIP_TAHITI) {
4432                 switch (mc_id) {
4433                 case 160:
4434                 case 144:
4435                 case 96:
4436                 case 80:
4437                 case 224:
4438                 case 208:
4439                 case 32:
4440                 case 16:
4441                         block = "CB";
4442                         break;
4443                 case 161:
4444                 case 145:
4445                 case 97:
4446                 case 81:
4447                 case 225:
4448                 case 209:
4449                 case 33:
4450                 case 17:
4451                         block = "CB_FMASK";
4452                         break;
4453                 case 162:
4454                 case 146:
4455                 case 98:
4456                 case 82:
4457                 case 226:
4458                 case 210:
4459                 case 34:
4460                 case 18:
4461                         block = "CB_CMASK";
4462                         break;
4463                 case 163:
4464                 case 147:
4465                 case 99:
4466                 case 83:
4467                 case 227:
4468                 case 211:
4469                 case 35:
4470                 case 19:
4471                         block = "CB_IMMED";
4472                         break;
4473                 case 164:
4474                 case 148:
4475                 case 100:
4476                 case 84:
4477                 case 228:
4478                 case 212:
4479                 case 36:
4480                 case 20:
4481                         block = "DB";
4482                         break;
4483                 case 165:
4484                 case 149:
4485                 case 101:
4486                 case 85:
4487                 case 229:
4488                 case 213:
4489                 case 37:
4490                 case 21:
4491                         block = "DB_HTILE";
4492                         break;
4493                 case 167:
4494                 case 151:
4495                 case 103:
4496                 case 87:
4497                 case 231:
4498                 case 215:
4499                 case 39:
4500                 case 23:
4501                         block = "DB_STEN";
4502                         break;
4503                 case 72:
4504                 case 68:
4505                 case 64:
4506                 case 8:
4507                 case 4:
4508                 case 0:
4509                 case 136:
4510                 case 132:
4511                 case 128:
4512                 case 200:
4513                 case 196:
4514                 case 192:
4515                         block = "TC";
4516                         break;
4517                 case 112:
4518                 case 48:
4519                         block = "CP";
4520                         break;
4521                 case 49:
4522                 case 177:
4523                 case 50:
4524                 case 178:
4525                         block = "SH";
4526                         break;
4527                 case 53:
4528                 case 190:
4529                         block = "VGT";
4530                         break;
4531                 case 117:
4532                         block = "IH";
4533                         break;
4534                 case 51:
4535                 case 115:
4536                         block = "RLC";
4537                         break;
4538                 case 119:
4539                 case 183:
4540                         block = "DMA0";
4541                         break;
4542                 case 61:
4543                         block = "DMA1";
4544                         break;
4545                 case 248:
4546                 case 120:
4547                         block = "HDP";
4548                         break;
4549                 default:
4550                         block = "unknown";
4551                         break;
4552                 }
4553         } else {
4554                 switch (mc_id) {
4555                 case 32:
4556                 case 16:
4557                 case 96:
4558                 case 80:
4559                 case 160:
4560                 case 144:
4561                 case 224:
4562                 case 208:
4563                         block = "CB";
4564                         break;
4565                 case 33:
4566                 case 17:
4567                 case 97:
4568                 case 81:
4569                 case 161:
4570                 case 145:
4571                 case 225:
4572                 case 209:
4573                         block = "CB_FMASK";
4574                         break;
4575                 case 34:
4576                 case 18:
4577                 case 98:
4578                 case 82:
4579                 case 162:
4580                 case 146:
4581                 case 226:
4582                 case 210:
4583                         block = "CB_CMASK";
4584                         break;
4585                 case 35:
4586                 case 19:
4587                 case 99:
4588                 case 83:
4589                 case 163:
4590                 case 147:
4591                 case 227:
4592                 case 211:
4593                         block = "CB_IMMED";
4594                         break;
4595                 case 36:
4596                 case 20:
4597                 case 100:
4598                 case 84:
4599                 case 164:
4600                 case 148:
4601                 case 228:
4602                 case 212:
4603                         block = "DB";
4604                         break;
4605                 case 37:
4606                 case 21:
4607                 case 101:
4608                 case 85:
4609                 case 165:
4610                 case 149:
4611                 case 229:
4612                 case 213:
4613                         block = "DB_HTILE";
4614                         break;
4615                 case 39:
4616                 case 23:
4617                 case 103:
4618                 case 87:
4619                 case 167:
4620                 case 151:
4621                 case 231:
4622                 case 215:
4623                         block = "DB_STEN";
4624                         break;
4625                 case 72:
4626                 case 68:
4627                 case 8:
4628                 case 4:
4629                 case 136:
4630                 case 132:
4631                 case 200:
4632                 case 196:
4633                         block = "TC";
4634                         break;
4635                 case 112:
4636                 case 48:
4637                         block = "CP";
4638                         break;
4639                 case 49:
4640                 case 177:
4641                 case 50:
4642                 case 178:
4643                         block = "SH";
4644                         break;
4645                 case 53:
4646                         block = "VGT";
4647                         break;
4648                 case 117:
4649                         block = "IH";
4650                         break;
4651                 case 51:
4652                 case 115:
4653                         block = "RLC";
4654                         break;
4655                 case 119:
4656                 case 183:
4657                         block = "DMA0";
4658                         break;
4659                 case 61:
4660                         block = "DMA1";
4661                         break;
4662                 case 248:
4663                 case 120:
4664                         block = "HDP";
4665                         break;
4666                 default:
4667                         block = "unknown";
4668                         break;
4669                 }
4670         }
4671
4672         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4673                protections, vmid, addr,
4674                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4675                block, mc_id);
4676 }
4677
4678 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4679 {
4680         struct radeon_ring *ring = &rdev->ring[ridx];
4681
4682         if (vm == NULL)
4683                 return;
4684
4685         /* write new base address */
4686         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4687         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4688                                  WRITE_DATA_DST_SEL(0)));
4689
4690         if (vm->id < 8) {
4691                 radeon_ring_write(ring,
4692                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4693         } else {
4694                 radeon_ring_write(ring,
4695                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4696         }
4697         radeon_ring_write(ring, 0);
4698         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4699
4700         /* flush hdp cache */
4701         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4702         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4703                                  WRITE_DATA_DST_SEL(0)));
4704         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4705         radeon_ring_write(ring, 0);
4706         radeon_ring_write(ring, 0x1);
4707
4708         /* bits 0-15 are the VM contexts0-15 */
4709         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4710         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4711                                  WRITE_DATA_DST_SEL(0)));
4712         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4713         radeon_ring_write(ring, 0);
4714         radeon_ring_write(ring, 1 << vm->id);
4715
4716         /* sync PFP to ME, otherwise we might get invalid PFP reads */
4717         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4718         radeon_ring_write(ring, 0x0);
4719 }
4720
4721 /*
4722  *  Power and clock gating
4723  */
4724 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4725 {
4726         int i;
4727
4728         for (i = 0; i < rdev->usec_timeout; i++) {
4729                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4730                         break;
4731                 udelay(1);
4732         }
4733
4734         for (i = 0; i < rdev->usec_timeout; i++) {
4735                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4736                         break;
4737                 udelay(1);
4738         }
4739 }
4740
4741 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4742                                          bool enable)
4743 {
4744         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4745         u32 mask;
4746         int i;
4747
4748         if (enable)
4749                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4750         else
4751                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4752         WREG32(CP_INT_CNTL_RING0, tmp);
4753
4754         if (!enable) {
4755                 /* read a gfx register */
4756                 tmp = RREG32(DB_DEPTH_INFO);
4757
4758                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4759                 for (i = 0; i < rdev->usec_timeout; i++) {
4760                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4761                                 break;
4762                         udelay(1);
4763                 }
4764         }
4765 }
4766
4767 static void si_set_uvd_dcm(struct radeon_device *rdev,
4768                            bool sw_mode)
4769 {
4770         u32 tmp, tmp2;
4771
4772         tmp = RREG32(UVD_CGC_CTRL);
4773         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4774         tmp |= DCM | CG_DT(1) | CLK_OD(4);
4775
4776         if (sw_mode) {
4777                 tmp &= ~0x7ffff800;
4778                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4779         } else {
4780                 tmp |= 0x7ffff800;
4781                 tmp2 = 0;
4782         }
4783
4784         WREG32(UVD_CGC_CTRL, tmp);
4785         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4786 }
4787
4788 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4789 {
4790         bool hw_mode = true;
4791
4792         if (hw_mode) {
4793                 si_set_uvd_dcm(rdev, false);
4794         } else {
4795                 u32 tmp = RREG32(UVD_CGC_CTRL);
4796                 tmp &= ~DCM;
4797                 WREG32(UVD_CGC_CTRL, tmp);
4798         }
4799 }
4800
4801 static u32 si_halt_rlc(struct radeon_device *rdev)
4802 {
4803         u32 data, orig;
4804
4805         orig = data = RREG32(RLC_CNTL);
4806
4807         if (data & RLC_ENABLE) {
4808                 data &= ~RLC_ENABLE;
4809                 WREG32(RLC_CNTL, data);
4810
4811                 si_wait_for_rlc_serdes(rdev);
4812         }
4813
4814         return orig;
4815 }
4816
4817 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4818 {
4819         u32 tmp;
4820
4821         tmp = RREG32(RLC_CNTL);
4822         if (tmp != rlc)
4823                 WREG32(RLC_CNTL, rlc);
4824 }
4825
4826 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4827 {
4828         u32 data, orig;
4829
4830         orig = data = RREG32(DMA_PG);
4831         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4832                 data |= PG_CNTL_ENABLE;
4833         else
4834                 data &= ~PG_CNTL_ENABLE;
4835         if (orig != data)
4836                 WREG32(DMA_PG, data);
4837 }
4838
4839 static void si_init_dma_pg(struct radeon_device *rdev)
4840 {
4841         u32 tmp;
4842
4843         WREG32(DMA_PGFSM_WRITE,  0x00002000);
4844         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4845
4846         for (tmp = 0; tmp < 5; tmp++)
4847                 WREG32(DMA_PGFSM_WRITE, 0);
4848 }
4849
4850 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4851                                bool enable)
4852 {
4853         u32 tmp;
4854
4855         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
4856                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4857                 WREG32(RLC_TTOP_D, tmp);
4858
4859                 tmp = RREG32(RLC_PG_CNTL);
4860                 tmp |= GFX_PG_ENABLE;
4861                 WREG32(RLC_PG_CNTL, tmp);
4862
4863                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4864                 tmp |= AUTO_PG_EN;
4865                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4866         } else {
4867                 tmp = RREG32(RLC_AUTO_PG_CTRL);
4868                 tmp &= ~AUTO_PG_EN;
4869                 WREG32(RLC_AUTO_PG_CTRL, tmp);
4870
4871                 tmp = RREG32(DB_RENDER_CONTROL);
4872         }
4873 }
4874
4875 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4876 {
4877         u32 tmp;
4878
4879         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4880
4881         tmp = RREG32(RLC_PG_CNTL);
4882         tmp |= GFX_PG_SRC;
4883         WREG32(RLC_PG_CNTL, tmp);
4884
4885         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4886
4887         tmp = RREG32(RLC_AUTO_PG_CTRL);
4888
4889         tmp &= ~GRBM_REG_SGIT_MASK;
4890         tmp |= GRBM_REG_SGIT(0x700);
4891         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4892         WREG32(RLC_AUTO_PG_CTRL, tmp);
4893 }
4894
4895 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4896 {
4897         u32 mask = 0, tmp, tmp1;
4898         int i;
4899
4900         si_select_se_sh(rdev, se, sh);
4901         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4902         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4903         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4904
4905         tmp &= 0xffff0000;
4906
4907         tmp |= tmp1;
4908         tmp >>= 16;
4909
4910         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4911                 mask <<= 1;
4912                 mask |= 1;
4913         }
4914
4915         return (~tmp) & mask;
4916 }
4917
4918 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4919 {
4920         u32 i, j, k, active_cu_number = 0;
4921         u32 mask, counter, cu_bitmap;
4922         u32 tmp = 0;
4923
4924         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4925                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4926                         mask = 1;
4927                         cu_bitmap = 0;
4928                         counter  = 0;
4929                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4930                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4931                                         if (counter < 2)
4932                                                 cu_bitmap |= mask;
4933                                         counter++;
4934                                 }
4935                                 mask <<= 1;
4936                         }
4937
4938                         active_cu_number += counter;
4939                         tmp |= (cu_bitmap << (i * 16 + j * 8));
4940                 }
4941         }
4942
4943         WREG32(RLC_PG_AO_CU_MASK, tmp);
4944
4945         tmp = RREG32(RLC_MAX_PG_CU);
4946         tmp &= ~MAX_PU_CU_MASK;
4947         tmp |= MAX_PU_CU(active_cu_number);
4948         WREG32(RLC_MAX_PG_CU, tmp);
4949 }
4950
4951 static void si_enable_cgcg(struct radeon_device *rdev,
4952                            bool enable)
4953 {
4954         u32 data, orig, tmp;
4955
4956         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4957
4958         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
4959                 si_enable_gui_idle_interrupt(rdev, true);
4960
4961                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4962
4963                 tmp = si_halt_rlc(rdev);
4964
4965                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
4966                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
4967                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
4968
4969                 si_wait_for_rlc_serdes(rdev);
4970
4971                 si_update_rlc(rdev, tmp);
4972
4973                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
4974
4975                 data |= CGCG_EN | CGLS_EN;
4976         } else {
4977                 si_enable_gui_idle_interrupt(rdev, false);
4978
4979                 RREG32(CB_CGTT_SCLK_CTRL);
4980                 RREG32(CB_CGTT_SCLK_CTRL);
4981                 RREG32(CB_CGTT_SCLK_CTRL);
4982                 RREG32(CB_CGTT_SCLK_CTRL);
4983
4984                 data &= ~(CGCG_EN | CGLS_EN);
4985         }
4986
4987         if (orig != data)
4988                 WREG32(RLC_CGCG_CGLS_CTRL, data);
4989 }
4990
4991 static void si_enable_mgcg(struct radeon_device *rdev,
4992                            bool enable)
4993 {
4994         u32 data, orig, tmp = 0;
4995
4996         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
4997                 orig = data = RREG32(CGTS_SM_CTRL_REG);
4998                 data = 0x96940200;
4999                 if (orig != data)
5000                         WREG32(CGTS_SM_CTRL_REG, data);
5001
5002                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5003                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5004                         data |= CP_MEM_LS_EN;
5005                         if (orig != data)
5006                                 WREG32(CP_MEM_SLP_CNTL, data);
5007                 }
5008
5009                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5010                 data &= 0xffffffc0;
5011                 if (orig != data)
5012                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5013
5014                 tmp = si_halt_rlc(rdev);
5015
5016                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5017                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5018                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5019
5020                 si_update_rlc(rdev, tmp);
5021         } else {
5022                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5023                 data |= 0x00000003;
5024                 if (orig != data)
5025                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5026
5027                 data = RREG32(CP_MEM_SLP_CNTL);
5028                 if (data & CP_MEM_LS_EN) {
5029                         data &= ~CP_MEM_LS_EN;
5030                         WREG32(CP_MEM_SLP_CNTL, data);
5031                 }
5032                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5033                 data |= LS_OVERRIDE | OVERRIDE;
5034                 if (orig != data)
5035                         WREG32(CGTS_SM_CTRL_REG, data);
5036
5037                 tmp = si_halt_rlc(rdev);
5038
5039                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5040                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5041                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5042
5043                 si_update_rlc(rdev, tmp);
5044         }
5045 }
5046
5047 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5048                                bool enable)
5049 {
5050         u32 orig, data, tmp;
5051
5052         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5053                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5054                 tmp |= 0x3fff;
5055                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5056
5057                 orig = data = RREG32(UVD_CGC_CTRL);
5058                 data |= DCM;
5059                 if (orig != data)
5060                         WREG32(UVD_CGC_CTRL, data);
5061
5062                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5063                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5064         } else {
5065                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5066                 tmp &= ~0x3fff;
5067                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5068
5069                 orig = data = RREG32(UVD_CGC_CTRL);
5070                 data &= ~DCM;
5071                 if (orig != data)
5072                         WREG32(UVD_CGC_CTRL, data);
5073
5074                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5075                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5076         }
5077 }
5078
5079 static const u32 mc_cg_registers[] =
5080 {
5081         MC_HUB_MISC_HUB_CG,
5082         MC_HUB_MISC_SIP_CG,
5083         MC_HUB_MISC_VM_CG,
5084         MC_XPB_CLK_GAT,
5085         ATC_MISC_CG,
5086         MC_CITF_MISC_WR_CG,
5087         MC_CITF_MISC_RD_CG,
5088         MC_CITF_MISC_VM_CG,
5089         VM_L2_CG,
5090 };
5091
5092 static void si_enable_mc_ls(struct radeon_device *rdev,
5093                             bool enable)
5094 {
5095         int i;
5096         u32 orig, data;
5097
5098         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5099                 orig = data = RREG32(mc_cg_registers[i]);
5100                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5101                         data |= MC_LS_ENABLE;
5102                 else
5103                         data &= ~MC_LS_ENABLE;
5104                 if (data != orig)
5105                         WREG32(mc_cg_registers[i], data);
5106         }
5107 }
5108
5109 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5110                                bool enable)
5111 {
5112         int i;
5113         u32 orig, data;
5114
5115         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5116                 orig = data = RREG32(mc_cg_registers[i]);
5117                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5118                         data |= MC_CG_ENABLE;
5119                 else
5120                         data &= ~MC_CG_ENABLE;
5121                 if (data != orig)
5122                         WREG32(mc_cg_registers[i], data);
5123         }
5124 }
5125
5126 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5127                                bool enable)
5128 {
5129         u32 orig, data, offset;
5130         int i;
5131
5132         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5133                 for (i = 0; i < 2; i++) {
5134                         if (i == 0)
5135                                 offset = DMA0_REGISTER_OFFSET;
5136                         else
5137                                 offset = DMA1_REGISTER_OFFSET;
5138                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5139                         data &= ~MEM_POWER_OVERRIDE;
5140                         if (data != orig)
5141                                 WREG32(DMA_POWER_CNTL + offset, data);
5142                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5143                 }
5144         } else {
5145                 for (i = 0; i < 2; i++) {
5146                         if (i == 0)
5147                                 offset = DMA0_REGISTER_OFFSET;
5148                         else
5149                                 offset = DMA1_REGISTER_OFFSET;
5150                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5151                         data |= MEM_POWER_OVERRIDE;
5152                         if (data != orig)
5153                                 WREG32(DMA_POWER_CNTL + offset, data);
5154
5155                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5156                         data = 0xff000000;
5157                         if (data != orig)
5158                                 WREG32(DMA_CLK_CTRL + offset, data);
5159                 }
5160         }
5161 }
5162
5163 static void si_enable_bif_mgls(struct radeon_device *rdev,
5164                                bool enable)
5165 {
5166         u32 orig, data;
5167
5168         orig = data = RREG32_PCIE(PCIE_CNTL2);
5169
5170         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5171                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5172                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5173         else
5174                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5175                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5176
5177         if (orig != data)
5178                 WREG32_PCIE(PCIE_CNTL2, data);
5179 }
5180
5181 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5182                                bool enable)
5183 {
5184         u32 orig, data;
5185
5186         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5187
5188         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5189                 data &= ~CLOCK_GATING_DIS;
5190         else
5191                 data |= CLOCK_GATING_DIS;
5192
5193         if (orig != data)
5194                 WREG32(HDP_HOST_PATH_CNTL, data);
5195 }
5196
5197 static void si_enable_hdp_ls(struct radeon_device *rdev,
5198                              bool enable)
5199 {
5200         u32 orig, data;
5201
5202         orig = data = RREG32(HDP_MEM_POWER_LS);
5203
5204         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5205                 data |= HDP_LS_ENABLE;
5206         else
5207                 data &= ~HDP_LS_ENABLE;
5208
5209         if (orig != data)
5210                 WREG32(HDP_MEM_POWER_LS, data);
5211 }
5212
5213 void si_update_cg(struct radeon_device *rdev,
5214                   u32 block, bool enable)
5215 {
5216         if (block & RADEON_CG_BLOCK_GFX) {
5217                 si_enable_gui_idle_interrupt(rdev, false);
5218                 /* order matters! */
5219                 if (enable) {
5220                         si_enable_mgcg(rdev, true);
5221                         si_enable_cgcg(rdev, true);
5222                 } else {
5223                         si_enable_cgcg(rdev, false);
5224                         si_enable_mgcg(rdev, false);
5225                 }
5226                 si_enable_gui_idle_interrupt(rdev, true);
5227         }
5228
5229         if (block & RADEON_CG_BLOCK_MC) {
5230                 si_enable_mc_mgcg(rdev, enable);
5231                 si_enable_mc_ls(rdev, enable);
5232         }
5233
5234         if (block & RADEON_CG_BLOCK_SDMA) {
5235                 si_enable_dma_mgcg(rdev, enable);
5236         }
5237
5238         if (block & RADEON_CG_BLOCK_BIF) {
5239                 si_enable_bif_mgls(rdev, enable);
5240         }
5241
5242         if (block & RADEON_CG_BLOCK_UVD) {
5243                 if (rdev->has_uvd) {
5244                         si_enable_uvd_mgcg(rdev, enable);
5245                 }
5246         }
5247
5248         if (block & RADEON_CG_BLOCK_HDP) {
5249                 si_enable_hdp_mgcg(rdev, enable);
5250                 si_enable_hdp_ls(rdev, enable);
5251         }
5252 }
5253
5254 static void si_init_cg(struct radeon_device *rdev)
5255 {
5256         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5257                             RADEON_CG_BLOCK_MC |
5258                             RADEON_CG_BLOCK_SDMA |
5259                             RADEON_CG_BLOCK_BIF |
5260                             RADEON_CG_BLOCK_HDP), true);
5261         if (rdev->has_uvd) {
5262                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5263                 si_init_uvd_internal_cg(rdev);
5264         }
5265 }
5266
5267 static void si_fini_cg(struct radeon_device *rdev)
5268 {
5269         if (rdev->has_uvd) {
5270                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5271         }
5272         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5273                             RADEON_CG_BLOCK_MC |
5274                             RADEON_CG_BLOCK_SDMA |
5275                             RADEON_CG_BLOCK_BIF |
5276                             RADEON_CG_BLOCK_HDP), false);
5277 }
5278
5279 u32 si_get_csb_size(struct radeon_device *rdev)
5280 {
5281         u32 count = 0;
5282         const struct cs_section_def *sect = NULL;
5283         const struct cs_extent_def *ext = NULL;
5284
5285         if (rdev->rlc.cs_data == NULL)
5286                 return 0;
5287
5288         /* begin clear state */
5289         count += 2;
5290         /* context control state */
5291         count += 3;
5292
5293         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5294                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5295                         if (sect->id == SECT_CONTEXT)
5296                                 count += 2 + ext->reg_count;
5297                         else
5298                                 return 0;
5299                 }
5300         }
5301         /* pa_sc_raster_config */
5302         count += 3;
5303         /* end clear state */
5304         count += 2;
5305         /* clear state */
5306         count += 2;
5307
5308         return count;
5309 }
5310
5311 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5312 {
5313         u32 count = 0, i;
5314         const struct cs_section_def *sect = NULL;
5315         const struct cs_extent_def *ext = NULL;
5316
5317         if (rdev->rlc.cs_data == NULL)
5318                 return;
5319         if (buffer == NULL)
5320                 return;
5321
5322         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5323         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5324
5325         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5326         buffer[count++] = cpu_to_le32(0x80000000);
5327         buffer[count++] = cpu_to_le32(0x80000000);
5328
5329         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5330                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5331                         if (sect->id == SECT_CONTEXT) {
5332                                 buffer[count++] =
5333                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5334                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5335                                 for (i = 0; i < ext->reg_count; i++)
5336                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5337                         } else {
5338                                 return;
5339                         }
5340                 }
5341         }
5342
5343         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5344         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5345         switch (rdev->family) {
5346         case CHIP_TAHITI:
5347         case CHIP_PITCAIRN:
5348                 buffer[count++] = cpu_to_le32(0x2a00126a);
5349                 break;
5350         case CHIP_VERDE:
5351                 buffer[count++] = cpu_to_le32(0x0000124a);
5352                 break;
5353         case CHIP_OLAND:
5354                 buffer[count++] = cpu_to_le32(0x00000082);
5355                 break;
5356         case CHIP_HAINAN:
5357                 buffer[count++] = cpu_to_le32(0x00000000);
5358                 break;
5359         default:
5360                 buffer[count++] = cpu_to_le32(0x00000000);
5361                 break;
5362         }
5363
5364         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5365         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5366
5367         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5368         buffer[count++] = cpu_to_le32(0);
5369 }
5370
5371 static void si_init_pg(struct radeon_device *rdev)
5372 {
5373         if (rdev->pg_flags) {
5374                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5375                         si_init_dma_pg(rdev);
5376                 }
5377                 si_init_ao_cu_mask(rdev);
5378                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5379                         si_init_gfx_cgpg(rdev);
5380                 }
5381                 si_enable_dma_pg(rdev, true);
5382                 si_enable_gfx_cgpg(rdev, true);
5383         } else {
5384                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5385                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5386         }
5387 }
5388
5389 static void si_fini_pg(struct radeon_device *rdev)
5390 {
5391         if (rdev->pg_flags) {
5392                 si_enable_dma_pg(rdev, false);
5393                 si_enable_gfx_cgpg(rdev, false);
5394         }
5395 }
5396
5397 /*
5398  * RLC
5399  */
5400 void si_rlc_reset(struct radeon_device *rdev)
5401 {
5402         u32 tmp = RREG32(GRBM_SOFT_RESET);
5403
5404         tmp |= SOFT_RESET_RLC;
5405         WREG32(GRBM_SOFT_RESET, tmp);
5406         udelay(50);
5407         tmp &= ~SOFT_RESET_RLC;
5408         WREG32(GRBM_SOFT_RESET, tmp);
5409         udelay(50);
5410 }
5411
5412 static void si_rlc_stop(struct radeon_device *rdev)
5413 {
5414         WREG32(RLC_CNTL, 0);
5415
5416         si_enable_gui_idle_interrupt(rdev, false);
5417
5418         si_wait_for_rlc_serdes(rdev);
5419 }
5420
5421 static void si_rlc_start(struct radeon_device *rdev)
5422 {
5423         WREG32(RLC_CNTL, RLC_ENABLE);
5424
5425         si_enable_gui_idle_interrupt(rdev, true);
5426
5427         udelay(50);
5428 }
5429
5430 static bool si_lbpw_supported(struct radeon_device *rdev)
5431 {
5432         u32 tmp;
5433
5434         /* Enable LBPW only for DDR3 */
5435         tmp = RREG32(MC_SEQ_MISC0);
5436         if ((tmp & 0xF0000000) == 0xB0000000)
5437                 return true;
5438         return false;
5439 }
5440
5441 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5442 {
5443         u32 tmp;
5444
5445         tmp = RREG32(RLC_LB_CNTL);
5446         if (enable)
5447                 tmp |= LOAD_BALANCE_ENABLE;
5448         else
5449                 tmp &= ~LOAD_BALANCE_ENABLE;
5450         WREG32(RLC_LB_CNTL, tmp);
5451
5452         if (!enable) {
5453                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5454                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5455         }
5456 }
5457
5458 static int si_rlc_resume(struct radeon_device *rdev)
5459 {
5460         u32 i;
5461         const __be32 *fw_data;
5462
5463         if (!rdev->rlc_fw)
5464                 return -EINVAL;
5465
5466         si_rlc_stop(rdev);
5467
5468         si_rlc_reset(rdev);
5469
5470         si_init_pg(rdev);
5471
5472         si_init_cg(rdev);
5473
5474         WREG32(RLC_RL_BASE, 0);
5475         WREG32(RLC_RL_SIZE, 0);
5476         WREG32(RLC_LB_CNTL, 0);
5477         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5478         WREG32(RLC_LB_CNTR_INIT, 0);
5479         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5480
5481         WREG32(RLC_MC_CNTL, 0);
5482         WREG32(RLC_UCODE_CNTL, 0);
5483
5484         fw_data = (const __be32 *)rdev->rlc_fw->data;
5485         for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5486                 WREG32(RLC_UCODE_ADDR, i);
5487                 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5488         }
5489         WREG32(RLC_UCODE_ADDR, 0);
5490
5491         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5492
5493         si_rlc_start(rdev);
5494
5495         return 0;
5496 }
5497
5498 static void si_enable_interrupts(struct radeon_device *rdev)
5499 {
5500         u32 ih_cntl = RREG32(IH_CNTL);
5501         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5502
5503         ih_cntl |= ENABLE_INTR;
5504         ih_rb_cntl |= IH_RB_ENABLE;
5505         WREG32(IH_CNTL, ih_cntl);
5506         WREG32(IH_RB_CNTL, ih_rb_cntl);
5507         rdev->ih.enabled = true;
5508 }
5509
5510 static void si_disable_interrupts(struct radeon_device *rdev)
5511 {
5512         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5513         u32 ih_cntl = RREG32(IH_CNTL);
5514
5515         ih_rb_cntl &= ~IH_RB_ENABLE;
5516         ih_cntl &= ~ENABLE_INTR;
5517         WREG32(IH_RB_CNTL, ih_rb_cntl);
5518         WREG32(IH_CNTL, ih_cntl);
5519         /* set rptr, wptr to 0 */
5520         WREG32(IH_RB_RPTR, 0);
5521         WREG32(IH_RB_WPTR, 0);
5522         rdev->ih.enabled = false;
5523         rdev->ih.rptr = 0;
5524 }
5525
5526 static void si_disable_interrupt_state(struct radeon_device *rdev)
5527 {
5528         u32 tmp;
5529
5530         tmp = RREG32(CP_INT_CNTL_RING0) &
5531                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5532         WREG32(CP_INT_CNTL_RING0, tmp);
5533         WREG32(CP_INT_CNTL_RING1, 0);
5534         WREG32(CP_INT_CNTL_RING2, 0);
5535         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5536         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5537         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5538         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5539         WREG32(GRBM_INT_CNTL, 0);
5540         if (rdev->num_crtc >= 2) {
5541                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5542                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5543         }
5544         if (rdev->num_crtc >= 4) {
5545                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5546                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5547         }
5548         if (rdev->num_crtc >= 6) {
5549                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5550                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5551         }
5552
5553         if (rdev->num_crtc >= 2) {
5554                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5555                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5556         }
5557         if (rdev->num_crtc >= 4) {
5558                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5559                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5560         }
5561         if (rdev->num_crtc >= 6) {
5562                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5563                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5564         }
5565
5566         if (!ASIC_IS_NODCE(rdev)) {
5567                 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5568
5569                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5570                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5571                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5572                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5573                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5574                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5575                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5576                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5577                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5578                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5579                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5580                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5581         }
5582 }
5583
5584 static int si_irq_init(struct radeon_device *rdev)
5585 {
5586         int ret = 0;
5587         int rb_bufsz;
5588         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5589
5590         /* allocate ring */
5591         ret = r600_ih_ring_alloc(rdev);
5592         if (ret)
5593                 return ret;
5594
5595         /* disable irqs */
5596         si_disable_interrupts(rdev);
5597
5598         /* init rlc */
5599         ret = si_rlc_resume(rdev);
5600         if (ret) {
5601                 r600_ih_ring_fini(rdev);
5602                 return ret;
5603         }
5604
5605         /* setup interrupt control */
5606         /* set dummy read address to ring address */
5607         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5608         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5609         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5610          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5611          */
5612         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5613         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5614         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5615         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5616
5617         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5618         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5619
5620         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5621                       IH_WPTR_OVERFLOW_CLEAR |
5622                       (rb_bufsz << 1));
5623
5624         if (rdev->wb.enabled)
5625                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5626
5627         /* set the writeback address whether it's enabled or not */
5628         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5629         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5630
5631         WREG32(IH_RB_CNTL, ih_rb_cntl);
5632
5633         /* set rptr, wptr to 0 */
5634         WREG32(IH_RB_RPTR, 0);
5635         WREG32(IH_RB_WPTR, 0);
5636
5637         /* Default settings for IH_CNTL (disabled at first) */
5638         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5639         /* RPTR_REARM only works if msi's are enabled */
5640         if (rdev->msi_enabled)
5641                 ih_cntl |= RPTR_REARM;
5642         WREG32(IH_CNTL, ih_cntl);
5643
5644         /* force the active interrupt state to all disabled */
5645         si_disable_interrupt_state(rdev);
5646
5647         pci_set_master(rdev->pdev);
5648
5649         /* enable irqs */
5650         si_enable_interrupts(rdev);
5651
5652         return ret;
5653 }
5654
5655 int si_irq_set(struct radeon_device *rdev)
5656 {
5657         u32 cp_int_cntl;
5658         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5659         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5660         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5661         u32 grbm_int_cntl = 0;
5662         u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5663         u32 dma_cntl, dma_cntl1;
5664         u32 thermal_int = 0;
5665
5666         if (!rdev->irq.installed) {
5667                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5668                 return -EINVAL;
5669         }
5670         /* don't enable anything if the ih is disabled */
5671         if (!rdev->ih.enabled) {
5672                 si_disable_interrupts(rdev);
5673                 /* force the active interrupt state to all disabled */
5674                 si_disable_interrupt_state(rdev);
5675                 return 0;
5676         }
5677
5678         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5679                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5680
5681         if (!ASIC_IS_NODCE(rdev)) {
5682                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5683                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5684                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5685                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5686                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5687                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5688         }
5689
5690         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5691         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5692
5693         thermal_int = RREG32(CG_THERMAL_INT) &
5694                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5695
5696         /* enable CP interrupts on all rings */
5697         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5698                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5699                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5700         }
5701         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5702                 DRM_DEBUG("si_irq_set: sw int cp1\n");
5703                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5704         }
5705         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5706                 DRM_DEBUG("si_irq_set: sw int cp2\n");
5707                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5708         }
5709         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5710                 DRM_DEBUG("si_irq_set: sw int dma\n");
5711                 dma_cntl |= TRAP_ENABLE;
5712         }
5713
5714         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5715                 DRM_DEBUG("si_irq_set: sw int dma1\n");
5716                 dma_cntl1 |= TRAP_ENABLE;
5717         }
5718         if (rdev->irq.crtc_vblank_int[0] ||
5719             atomic_read(&rdev->irq.pflip[0])) {
5720                 DRM_DEBUG("si_irq_set: vblank 0\n");
5721                 crtc1 |= VBLANK_INT_MASK;
5722         }
5723         if (rdev->irq.crtc_vblank_int[1] ||
5724             atomic_read(&rdev->irq.pflip[1])) {
5725                 DRM_DEBUG("si_irq_set: vblank 1\n");
5726                 crtc2 |= VBLANK_INT_MASK;
5727         }
5728         if (rdev->irq.crtc_vblank_int[2] ||
5729             atomic_read(&rdev->irq.pflip[2])) {
5730                 DRM_DEBUG("si_irq_set: vblank 2\n");
5731                 crtc3 |= VBLANK_INT_MASK;
5732         }
5733         if (rdev->irq.crtc_vblank_int[3] ||
5734             atomic_read(&rdev->irq.pflip[3])) {
5735                 DRM_DEBUG("si_irq_set: vblank 3\n");
5736                 crtc4 |= VBLANK_INT_MASK;
5737         }
5738         if (rdev->irq.crtc_vblank_int[4] ||
5739             atomic_read(&rdev->irq.pflip[4])) {
5740                 DRM_DEBUG("si_irq_set: vblank 4\n");
5741                 crtc5 |= VBLANK_INT_MASK;
5742         }
5743         if (rdev->irq.crtc_vblank_int[5] ||
5744             atomic_read(&rdev->irq.pflip[5])) {
5745                 DRM_DEBUG("si_irq_set: vblank 5\n");
5746                 crtc6 |= VBLANK_INT_MASK;
5747         }
5748         if (rdev->irq.hpd[0]) {
5749                 DRM_DEBUG("si_irq_set: hpd 1\n");
5750                 hpd1 |= DC_HPDx_INT_EN;
5751         }
5752         if (rdev->irq.hpd[1]) {
5753                 DRM_DEBUG("si_irq_set: hpd 2\n");
5754                 hpd2 |= DC_HPDx_INT_EN;
5755         }
5756         if (rdev->irq.hpd[2]) {
5757                 DRM_DEBUG("si_irq_set: hpd 3\n");
5758                 hpd3 |= DC_HPDx_INT_EN;
5759         }
5760         if (rdev->irq.hpd[3]) {
5761                 DRM_DEBUG("si_irq_set: hpd 4\n");
5762                 hpd4 |= DC_HPDx_INT_EN;
5763         }
5764         if (rdev->irq.hpd[4]) {
5765                 DRM_DEBUG("si_irq_set: hpd 5\n");
5766                 hpd5 |= DC_HPDx_INT_EN;
5767         }
5768         if (rdev->irq.hpd[5]) {
5769                 DRM_DEBUG("si_irq_set: hpd 6\n");
5770                 hpd6 |= DC_HPDx_INT_EN;
5771         }
5772
5773         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5774         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5775         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5776
5777         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5778         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5779
5780         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5781
5782         if (rdev->irq.dpm_thermal) {
5783                 DRM_DEBUG("dpm thermal\n");
5784                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5785         }
5786
5787         if (rdev->num_crtc >= 2) {
5788                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5789                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5790         }
5791         if (rdev->num_crtc >= 4) {
5792                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5793                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5794         }
5795         if (rdev->num_crtc >= 6) {
5796                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5797                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5798         }
5799
5800         if (rdev->num_crtc >= 2) {
5801                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5802                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5803         }
5804         if (rdev->num_crtc >= 4) {
5805                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5806                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5807         }
5808         if (rdev->num_crtc >= 6) {
5809                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5810                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5811         }
5812
5813         if (!ASIC_IS_NODCE(rdev)) {
5814                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5815                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5816                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5817                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5818                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5819                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5820         }
5821
5822         WREG32(CG_THERMAL_INT, thermal_int);
5823
5824         return 0;
5825 }
5826
5827 static inline void si_irq_ack(struct radeon_device *rdev)
5828 {
5829         u32 tmp;
5830
5831         if (ASIC_IS_NODCE(rdev))
5832                 return;
5833
5834         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5835         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5836         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5837         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5838         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5839         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5840         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5841         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5842         if (rdev->num_crtc >= 4) {
5843                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5844                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5845         }
5846         if (rdev->num_crtc >= 6) {
5847                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5848                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5849         }
5850
5851         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5852                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5853         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5854                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5855         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5856                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5857         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5858                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5859         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5860                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5861         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5862                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5863
5864         if (rdev->num_crtc >= 4) {
5865                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5866                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5867                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5868                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5869                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5870                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5871                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5872                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5873                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5874                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5875                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5876                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5877         }
5878
5879         if (rdev->num_crtc >= 6) {
5880                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5881                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5882                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5883                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5884                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5885                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5886                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5887                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5888                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5889                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5890                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5891                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5892         }
5893
5894         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5895                 tmp = RREG32(DC_HPD1_INT_CONTROL);
5896                 tmp |= DC_HPDx_INT_ACK;
5897                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5898         }
5899         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5900                 tmp = RREG32(DC_HPD2_INT_CONTROL);
5901                 tmp |= DC_HPDx_INT_ACK;
5902                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5903         }
5904         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5905                 tmp = RREG32(DC_HPD3_INT_CONTROL);
5906                 tmp |= DC_HPDx_INT_ACK;
5907                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5908         }
5909         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5910                 tmp = RREG32(DC_HPD4_INT_CONTROL);
5911                 tmp |= DC_HPDx_INT_ACK;
5912                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5913         }
5914         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5915                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5916                 tmp |= DC_HPDx_INT_ACK;
5917                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5918         }
5919         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5920                 tmp = RREG32(DC_HPD5_INT_CONTROL);
5921                 tmp |= DC_HPDx_INT_ACK;
5922                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5923         }
5924 }
5925
5926 static void si_irq_disable(struct radeon_device *rdev)
5927 {
5928         si_disable_interrupts(rdev);
5929         /* Wait and acknowledge irq */
5930         mdelay(1);
5931         si_irq_ack(rdev);
5932         si_disable_interrupt_state(rdev);
5933 }
5934
5935 static void si_irq_suspend(struct radeon_device *rdev)
5936 {
5937         si_irq_disable(rdev);
5938         si_rlc_stop(rdev);
5939 }
5940
5941 static void si_irq_fini(struct radeon_device *rdev)
5942 {
5943         si_irq_suspend(rdev);
5944         r600_ih_ring_fini(rdev);
5945 }
5946
5947 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5948 {
5949         u32 wptr, tmp;
5950
5951         if (rdev->wb.enabled)
5952                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5953         else
5954                 wptr = RREG32(IH_RB_WPTR);
5955
5956         if (wptr & RB_OVERFLOW) {
5957                 /* When a ring buffer overflow happen start parsing interrupt
5958                  * from the last not overwritten vector (wptr + 16). Hopefully
5959                  * this should allow us to catchup.
5960                  */
5961                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5962                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5963                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5964                 tmp = RREG32(IH_RB_CNTL);
5965                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5966                 WREG32(IH_RB_CNTL, tmp);
5967         }
5968         return (wptr & rdev->ih.ptr_mask);
5969 }
5970
5971 /*        SI IV Ring
5972  * Each IV ring entry is 128 bits:
5973  * [7:0]    - interrupt source id
5974  * [31:8]   - reserved
5975  * [59:32]  - interrupt source data
5976  * [63:60]  - reserved
5977  * [71:64]  - RINGID
5978  * [79:72]  - VMID
5979  * [127:80] - reserved
5980  */
5981 int si_irq_process(struct radeon_device *rdev)
5982 {
5983         u32 wptr;
5984         u32 rptr;
5985         u32 src_id, src_data, ring_id;
5986         u32 ring_index;
5987         bool queue_hotplug = false;
5988         bool queue_thermal = false;
5989         u32 status, addr;
5990
5991         if (!rdev->ih.enabled || rdev->shutdown)
5992                 return IRQ_NONE;
5993
5994         wptr = si_get_ih_wptr(rdev);
5995
5996 restart_ih:
5997         /* is somebody else already processing irqs? */
5998         if (atomic_xchg(&rdev->ih.lock, 1))
5999                 return IRQ_NONE;
6000
6001         rptr = rdev->ih.rptr;
6002         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6003
6004         /* Order reading of wptr vs. reading of IH ring data */
6005         rmb();
6006
6007         /* display interrupts */
6008         si_irq_ack(rdev);
6009
6010         while (rptr != wptr) {
6011                 /* wptr/rptr are in bytes! */
6012                 ring_index = rptr / 4;
6013                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6014                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6015                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6016
6017                 switch (src_id) {
6018                 case 1: /* D1 vblank/vline */
6019                         switch (src_data) {
6020                         case 0: /* D1 vblank */
6021                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6022                                         if (rdev->irq.crtc_vblank_int[0]) {
6023                                                 drm_handle_vblank(rdev->ddev, 0);
6024                                                 rdev->pm.vblank_sync = true;
6025                                                 wake_up(&rdev->irq.vblank_queue);
6026                                         }
6027                                         if (atomic_read(&rdev->irq.pflip[0]))
6028                                                 radeon_crtc_handle_flip(rdev, 0);
6029                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6030                                         DRM_DEBUG("IH: D1 vblank\n");
6031                                 }
6032                                 break;
6033                         case 1: /* D1 vline */
6034                                 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6035                                         rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6036                                         DRM_DEBUG("IH: D1 vline\n");
6037                                 }
6038                                 break;
6039                         default:
6040                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6041                                 break;
6042                         }
6043                         break;
6044                 case 2: /* D2 vblank/vline */
6045                         switch (src_data) {
6046                         case 0: /* D2 vblank */
6047                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6048                                         if (rdev->irq.crtc_vblank_int[1]) {
6049                                                 drm_handle_vblank(rdev->ddev, 1);
6050                                                 rdev->pm.vblank_sync = true;
6051                                                 wake_up(&rdev->irq.vblank_queue);
6052                                         }
6053                                         if (atomic_read(&rdev->irq.pflip[1]))
6054                                                 radeon_crtc_handle_flip(rdev, 1);
6055                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6056                                         DRM_DEBUG("IH: D2 vblank\n");
6057                                 }
6058                                 break;
6059                         case 1: /* D2 vline */
6060                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6061                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6062                                         DRM_DEBUG("IH: D2 vline\n");
6063                                 }
6064                                 break;
6065                         default:
6066                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6067                                 break;
6068                         }
6069                         break;
6070                 case 3: /* D3 vblank/vline */
6071                         switch (src_data) {
6072                         case 0: /* D3 vblank */
6073                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6074                                         if (rdev->irq.crtc_vblank_int[2]) {
6075                                                 drm_handle_vblank(rdev->ddev, 2);
6076                                                 rdev->pm.vblank_sync = true;
6077                                                 wake_up(&rdev->irq.vblank_queue);
6078                                         }
6079                                         if (atomic_read(&rdev->irq.pflip[2]))
6080                                                 radeon_crtc_handle_flip(rdev, 2);
6081                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6082                                         DRM_DEBUG("IH: D3 vblank\n");
6083                                 }
6084                                 break;
6085                         case 1: /* D3 vline */
6086                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6087                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6088                                         DRM_DEBUG("IH: D3 vline\n");
6089                                 }
6090                                 break;
6091                         default:
6092                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6093                                 break;
6094                         }
6095                         break;
6096                 case 4: /* D4 vblank/vline */
6097                         switch (src_data) {
6098                         case 0: /* D4 vblank */
6099                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6100                                         if (rdev->irq.crtc_vblank_int[3]) {
6101                                                 drm_handle_vblank(rdev->ddev, 3);
6102                                                 rdev->pm.vblank_sync = true;
6103                                                 wake_up(&rdev->irq.vblank_queue);
6104                                         }
6105                                         if (atomic_read(&rdev->irq.pflip[3]))
6106                                                 radeon_crtc_handle_flip(rdev, 3);
6107                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6108                                         DRM_DEBUG("IH: D4 vblank\n");
6109                                 }
6110                                 break;
6111                         case 1: /* D4 vline */
6112                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6113                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6114                                         DRM_DEBUG("IH: D4 vline\n");
6115                                 }
6116                                 break;
6117                         default:
6118                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6119                                 break;
6120                         }
6121                         break;
6122                 case 5: /* D5 vblank/vline */
6123                         switch (src_data) {
6124                         case 0: /* D5 vblank */
6125                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6126                                         if (rdev->irq.crtc_vblank_int[4]) {
6127                                                 drm_handle_vblank(rdev->ddev, 4);
6128                                                 rdev->pm.vblank_sync = true;
6129                                                 wake_up(&rdev->irq.vblank_queue);
6130                                         }
6131                                         if (atomic_read(&rdev->irq.pflip[4]))
6132                                                 radeon_crtc_handle_flip(rdev, 4);
6133                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6134                                         DRM_DEBUG("IH: D5 vblank\n");
6135                                 }
6136                                 break;
6137                         case 1: /* D5 vline */
6138                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6139                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6140                                         DRM_DEBUG("IH: D5 vline\n");
6141                                 }
6142                                 break;
6143                         default:
6144                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6145                                 break;
6146                         }
6147                         break;
6148                 case 6: /* D6 vblank/vline */
6149                         switch (src_data) {
6150                         case 0: /* D6 vblank */
6151                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6152                                         if (rdev->irq.crtc_vblank_int[5]) {
6153                                                 drm_handle_vblank(rdev->ddev, 5);
6154                                                 rdev->pm.vblank_sync = true;
6155                                                 wake_up(&rdev->irq.vblank_queue);
6156                                         }
6157                                         if (atomic_read(&rdev->irq.pflip[5]))
6158                                                 radeon_crtc_handle_flip(rdev, 5);
6159                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6160                                         DRM_DEBUG("IH: D6 vblank\n");
6161                                 }
6162                                 break;
6163                         case 1: /* D6 vline */
6164                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6165                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6166                                         DRM_DEBUG("IH: D6 vline\n");
6167                                 }
6168                                 break;
6169                         default:
6170                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6171                                 break;
6172                         }
6173                         break;
6174                 case 42: /* HPD hotplug */
6175                         switch (src_data) {
6176                         case 0:
6177                                 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6178                                         rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6179                                         queue_hotplug = true;
6180                                         DRM_DEBUG("IH: HPD1\n");
6181                                 }
6182                                 break;
6183                         case 1:
6184                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6185                                         rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6186                                         queue_hotplug = true;
6187                                         DRM_DEBUG("IH: HPD2\n");
6188                                 }
6189                                 break;
6190                         case 2:
6191                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6192                                         rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6193                                         queue_hotplug = true;
6194                                         DRM_DEBUG("IH: HPD3\n");
6195                                 }
6196                                 break;
6197                         case 3:
6198                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6199                                         rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6200                                         queue_hotplug = true;
6201                                         DRM_DEBUG("IH: HPD4\n");
6202                                 }
6203                                 break;
6204                         case 4:
6205                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6206                                         rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6207                                         queue_hotplug = true;
6208                                         DRM_DEBUG("IH: HPD5\n");
6209                                 }
6210                                 break;
6211                         case 5:
6212                                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6213                                         rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6214                                         queue_hotplug = true;
6215                                         DRM_DEBUG("IH: HPD6\n");
6216                                 }
6217                                 break;
6218                         default:
6219                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6220                                 break;
6221                         }
6222                         break;
6223                 case 146:
6224                 case 147:
6225                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6226                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6227                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6228                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6229                                 addr);
6230                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6231                                 status);
6232                         si_vm_decode_fault(rdev, status, addr);
6233                         /* reset addr and status */
6234                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6235                         break;
6236                 case 176: /* RINGID0 CP_INT */
6237                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6238                         break;
6239                 case 177: /* RINGID1 CP_INT */
6240                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6241                         break;
6242                 case 178: /* RINGID2 CP_INT */
6243                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6244                         break;
6245                 case 181: /* CP EOP event */
6246                         DRM_DEBUG("IH: CP EOP\n");
6247                         switch (ring_id) {
6248                         case 0:
6249                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6250                                 break;
6251                         case 1:
6252                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6253                                 break;
6254                         case 2:
6255                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6256                                 break;
6257                         }
6258                         break;
6259                 case 224: /* DMA trap event */
6260                         DRM_DEBUG("IH: DMA trap\n");
6261                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6262                         break;
6263                 case 230: /* thermal low to high */
6264                         DRM_DEBUG("IH: thermal low to high\n");
6265                         rdev->pm.dpm.thermal.high_to_low = false;
6266                         queue_thermal = true;
6267                         break;
6268                 case 231: /* thermal high to low */
6269                         DRM_DEBUG("IH: thermal high to low\n");
6270                         rdev->pm.dpm.thermal.high_to_low = true;
6271                         queue_thermal = true;
6272                         break;
6273                 case 233: /* GUI IDLE */
6274                         DRM_DEBUG("IH: GUI idle\n");
6275                         break;
6276                 case 244: /* DMA trap event */
6277                         DRM_DEBUG("IH: DMA1 trap\n");
6278                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6279                         break;
6280                 default:
6281                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6282                         break;
6283                 }
6284
6285                 /* wptr/rptr are in bytes! */
6286                 rptr += 16;
6287                 rptr &= rdev->ih.ptr_mask;
6288         }
6289         if (queue_hotplug)
6290                 schedule_work(&rdev->hotplug_work);
6291         if (queue_thermal && rdev->pm.dpm_enabled)
6292                 schedule_work(&rdev->pm.dpm.thermal.work);
6293         rdev->ih.rptr = rptr;
6294         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6295         atomic_set(&rdev->ih.lock, 0);
6296
6297         /* make sure wptr hasn't changed while processing */
6298         wptr = si_get_ih_wptr(rdev);
6299         if (wptr != rptr)
6300                 goto restart_ih;
6301
6302         return IRQ_HANDLED;
6303 }
6304
6305 /*
6306  * startup/shutdown callbacks
6307  */
6308 static int si_startup(struct radeon_device *rdev)
6309 {
6310         struct radeon_ring *ring;
6311         int r;
6312
6313         /* enable pcie gen2/3 link */
6314         si_pcie_gen3_enable(rdev);
6315         /* enable aspm */
6316         si_program_aspm(rdev);
6317
6318         /* scratch needs to be initialized before MC */
6319         r = r600_vram_scratch_init(rdev);
6320         if (r)
6321                 return r;
6322
6323         si_mc_program(rdev);
6324
6325         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6326             !rdev->rlc_fw || !rdev->mc_fw) {
6327                 r = si_init_microcode(rdev);
6328                 if (r) {
6329                         DRM_ERROR("Failed to load firmware!\n");
6330                         return r;
6331                 }
6332         }
6333
6334         r = si_mc_load_microcode(rdev);
6335         if (r) {
6336                 DRM_ERROR("Failed to load MC firmware!\n");
6337                 return r;
6338         }
6339
6340         r = si_pcie_gart_enable(rdev);
6341         if (r)
6342                 return r;
6343         si_gpu_init(rdev);
6344
6345         /* allocate rlc buffers */
6346         if (rdev->family == CHIP_VERDE) {
6347                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6348                 rdev->rlc.reg_list_size =
6349                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6350         }
6351         rdev->rlc.cs_data = si_cs_data;
6352         r = sumo_rlc_init(rdev);
6353         if (r) {
6354                 DRM_ERROR("Failed to init rlc BOs!\n");
6355                 return r;
6356         }
6357
6358         /* allocate wb buffer */
6359         r = radeon_wb_init(rdev);
6360         if (r)
6361                 return r;
6362
6363         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6364         if (r) {
6365                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6366                 return r;
6367         }
6368
6369         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6370         if (r) {
6371                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6372                 return r;
6373         }
6374
6375         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6376         if (r) {
6377                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6378                 return r;
6379         }
6380
6381         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6382         if (r) {
6383                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6384                 return r;
6385         }
6386
6387         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6388         if (r) {
6389                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6390                 return r;
6391         }
6392
6393         if (rdev->has_uvd) {
6394                 r = uvd_v2_2_resume(rdev);
6395                 if (!r) {
6396                         r = radeon_fence_driver_start_ring(rdev,
6397                                                            R600_RING_TYPE_UVD_INDEX);
6398                         if (r)
6399                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6400                 }
6401                 if (r)
6402                         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6403         }
6404
6405         /* Enable IRQ */
6406         if (!rdev->irq.installed) {
6407                 r = radeon_irq_kms_init(rdev);
6408                 if (r)
6409                         return r;
6410         }
6411
6412         r = si_irq_init(rdev);
6413         if (r) {
6414                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6415                 radeon_irq_kms_fini(rdev);
6416                 return r;
6417         }
6418         si_irq_set(rdev);
6419
6420         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6421         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6422                              CP_RB0_RPTR, CP_RB0_WPTR,
6423                              RADEON_CP_PACKET2);
6424         if (r)
6425                 return r;
6426
6427         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6428         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6429                              CP_RB1_RPTR, CP_RB1_WPTR,
6430                              RADEON_CP_PACKET2);
6431         if (r)
6432                 return r;
6433
6434         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6435         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6436                              CP_RB2_RPTR, CP_RB2_WPTR,
6437                              RADEON_CP_PACKET2);
6438         if (r)
6439                 return r;
6440
6441         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6442         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6443                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6444                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6445                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6446         if (r)
6447                 return r;
6448
6449         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6450         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6451                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6452                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6453                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6454         if (r)
6455                 return r;
6456
6457         r = si_cp_load_microcode(rdev);
6458         if (r)
6459                 return r;
6460         r = si_cp_resume(rdev);
6461         if (r)
6462                 return r;
6463
6464         r = cayman_dma_resume(rdev);
6465         if (r)
6466                 return r;
6467
6468         if (rdev->has_uvd) {
6469                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6470                 if (ring->ring_size) {
6471                         r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6472                                              UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6473                                              RADEON_CP_PACKET2);
6474                         if (!r)
6475                                 r = uvd_v1_0_init(rdev);
6476                         if (r)
6477                                 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6478                 }
6479         }
6480
6481         r = radeon_ib_pool_init(rdev);
6482         if (r) {
6483                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6484                 return r;
6485         }
6486
6487         r = radeon_vm_manager_init(rdev);
6488         if (r) {
6489                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6490                 return r;
6491         }
6492
6493         r = dce6_audio_init(rdev);
6494         if (r)
6495                 return r;
6496
6497         return 0;
6498 }
6499
6500 int si_resume(struct radeon_device *rdev)
6501 {
6502         int r;
6503
6504         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6505          * posting will perform necessary task to bring back GPU into good
6506          * shape.
6507          */
6508         /* post card */
6509         atom_asic_init(rdev->mode_info.atom_context);
6510
6511         /* init golden registers */
6512         si_init_golden_registers(rdev);
6513
6514         rdev->accel_working = true;
6515         r = si_startup(rdev);
6516         if (r) {
6517                 DRM_ERROR("si startup failed on resume\n");
6518                 rdev->accel_working = false;
6519                 return r;
6520         }
6521
6522         return r;
6523
6524 }
6525
6526 int si_suspend(struct radeon_device *rdev)
6527 {
6528         dce6_audio_fini(rdev);
6529         radeon_vm_manager_fini(rdev);
6530         si_cp_enable(rdev, false);
6531         cayman_dma_stop(rdev);
6532         if (rdev->has_uvd) {
6533                 uvd_v1_0_fini(rdev);
6534                 radeon_uvd_suspend(rdev);
6535         }
6536         si_fini_pg(rdev);
6537         si_fini_cg(rdev);
6538         si_irq_suspend(rdev);
6539         radeon_wb_disable(rdev);
6540         si_pcie_gart_disable(rdev);
6541         return 0;
6542 }
6543
6544 /* Plan is to move initialization in that function and use
6545  * helper function so that radeon_device_init pretty much
6546  * do nothing more than calling asic specific function. This
6547  * should also allow to remove a bunch of callback function
6548  * like vram_info.
6549  */
6550 int si_init(struct radeon_device *rdev)
6551 {
6552         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6553         int r;
6554
6555         /* Read BIOS */
6556         if (!radeon_get_bios(rdev)) {
6557                 if (ASIC_IS_AVIVO(rdev))
6558                         return -EINVAL;
6559         }
6560         /* Must be an ATOMBIOS */
6561         if (!rdev->is_atom_bios) {
6562                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6563                 return -EINVAL;
6564         }
6565         r = radeon_atombios_init(rdev);
6566         if (r)
6567                 return r;
6568
6569         /* Post card if necessary */
6570         if (!radeon_card_posted(rdev)) {
6571                 if (!rdev->bios) {
6572                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6573                         return -EINVAL;
6574                 }
6575                 DRM_INFO("GPU not posted. posting now...\n");
6576                 atom_asic_init(rdev->mode_info.atom_context);
6577         }
6578         /* init golden registers */
6579         si_init_golden_registers(rdev);
6580         /* Initialize scratch registers */
6581         si_scratch_init(rdev);
6582         /* Initialize surface registers */
6583         radeon_surface_init(rdev);
6584         /* Initialize clocks */
6585         radeon_get_clock_info(rdev->ddev);
6586
6587         /* Fence driver */
6588         r = radeon_fence_driver_init(rdev);
6589         if (r)
6590                 return r;
6591
6592         /* initialize memory controller */
6593         r = si_mc_init(rdev);
6594         if (r)
6595                 return r;
6596         /* Memory manager */
6597         r = radeon_bo_init(rdev);
6598         if (r)
6599                 return r;
6600
6601         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6602         ring->ring_obj = NULL;
6603         r600_ring_init(rdev, ring, 1024 * 1024);
6604
6605         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6606         ring->ring_obj = NULL;
6607         r600_ring_init(rdev, ring, 1024 * 1024);
6608
6609         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6610         ring->ring_obj = NULL;
6611         r600_ring_init(rdev, ring, 1024 * 1024);
6612
6613         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6614         ring->ring_obj = NULL;
6615         r600_ring_init(rdev, ring, 64 * 1024);
6616
6617         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6618         ring->ring_obj = NULL;
6619         r600_ring_init(rdev, ring, 64 * 1024);
6620
6621         if (rdev->has_uvd) {
6622                 r = radeon_uvd_init(rdev);
6623                 if (!r) {
6624                         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6625                         ring->ring_obj = NULL;
6626                         r600_ring_init(rdev, ring, 4096);
6627                 }
6628         }
6629
6630         rdev->ih.ring_obj = NULL;
6631         r600_ih_ring_init(rdev, 64 * 1024);
6632
6633         r = r600_pcie_gart_init(rdev);
6634         if (r)
6635                 return r;
6636
6637         rdev->accel_working = true;
6638         r = si_startup(rdev);
6639         if (r) {
6640                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6641                 si_cp_fini(rdev);
6642                 cayman_dma_fini(rdev);
6643                 si_irq_fini(rdev);
6644                 sumo_rlc_fini(rdev);
6645                 radeon_wb_fini(rdev);
6646                 radeon_ib_pool_fini(rdev);
6647                 radeon_vm_manager_fini(rdev);
6648                 radeon_irq_kms_fini(rdev);
6649                 si_pcie_gart_fini(rdev);
6650                 rdev->accel_working = false;
6651         }
6652
6653         /* Don't start up if the MC ucode is missing.
6654          * The default clocks and voltages before the MC ucode
6655          * is loaded are not suffient for advanced operations.
6656          */
6657         if (!rdev->mc_fw) {
6658                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6659                 return -EINVAL;
6660         }
6661
6662         return 0;
6663 }
6664
6665 void si_fini(struct radeon_device *rdev)
6666 {
6667         si_cp_fini(rdev);
6668         cayman_dma_fini(rdev);
6669         si_fini_pg(rdev);
6670         si_fini_cg(rdev);
6671         si_irq_fini(rdev);
6672         sumo_rlc_fini(rdev);
6673         radeon_wb_fini(rdev);
6674         radeon_vm_manager_fini(rdev);
6675         radeon_ib_pool_fini(rdev);
6676         radeon_irq_kms_fini(rdev);
6677         if (rdev->has_uvd) {
6678                 uvd_v1_0_fini(rdev);
6679                 radeon_uvd_fini(rdev);
6680         }
6681         si_pcie_gart_fini(rdev);
6682         r600_vram_scratch_fini(rdev);
6683         radeon_gem_fini(rdev);
6684         radeon_fence_driver_fini(rdev);
6685         radeon_bo_fini(rdev);
6686         radeon_atombios_fini(rdev);
6687         kfree(rdev->bios);
6688         rdev->bios = NULL;
6689 }
6690
6691 /**
6692  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6693  *
6694  * @rdev: radeon_device pointer
6695  *
6696  * Fetches a GPU clock counter snapshot (SI).
6697  * Returns the 64 bit clock counter snapshot.
6698  */
6699 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6700 {
6701         uint64_t clock;
6702
6703         mutex_lock(&rdev->gpu_clock_mutex);
6704         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6705         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6706                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6707         mutex_unlock(&rdev->gpu_clock_mutex);
6708         return clock;
6709 }
6710
6711 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6712 {
6713         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6714         int r;
6715
6716         /* bypass vclk and dclk with bclk */
6717         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6718                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6719                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6720
6721         /* put PLL in bypass mode */
6722         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6723
6724         if (!vclk || !dclk) {
6725                 /* keep the Bypass mode, put PLL to sleep */
6726                 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6727                 return 0;
6728         }
6729
6730         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6731                                           16384, 0x03FFFFFF, 0, 128, 5,
6732                                           &fb_div, &vclk_div, &dclk_div);
6733         if (r)
6734                 return r;
6735
6736         /* set RESET_ANTI_MUX to 0 */
6737         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6738
6739         /* set VCO_MODE to 1 */
6740         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6741
6742         /* toggle UPLL_SLEEP to 1 then back to 0 */
6743         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6744         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6745
6746         /* deassert UPLL_RESET */
6747         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6748
6749         mdelay(1);
6750
6751         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6752         if (r)
6753                 return r;
6754
6755         /* assert UPLL_RESET again */
6756         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6757
6758         /* disable spread spectrum. */
6759         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6760
6761         /* set feedback divider */
6762         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6763
6764         /* set ref divider to 0 */
6765         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6766
6767         if (fb_div < 307200)
6768                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6769         else
6770                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6771
6772         /* set PDIV_A and PDIV_B */
6773         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6774                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6775                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6776
6777         /* give the PLL some time to settle */
6778         mdelay(15);
6779
6780         /* deassert PLL_RESET */
6781         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6782
6783         mdelay(15);
6784
6785         /* switch from bypass mode to normal mode */
6786         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6787
6788         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6789         if (r)
6790                 return r;
6791
6792         /* switch VCLK and DCLK selection */
6793         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6794                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6795                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6796
6797         mdelay(100);
6798
6799         return 0;
6800 }
6801
6802 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6803 {
6804         struct pci_dev *root = rdev->pdev->bus->self;
6805         int bridge_pos, gpu_pos;
6806         u32 speed_cntl, mask, current_data_rate;
6807         int ret, i;
6808         u16 tmp16;
6809
6810         if (radeon_pcie_gen2 == 0)
6811                 return;
6812
6813         if (rdev->flags & RADEON_IS_IGP)
6814                 return;
6815
6816         if (!(rdev->flags & RADEON_IS_PCIE))
6817                 return;
6818
6819         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6820         if (ret != 0)
6821                 return;
6822
6823         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6824                 return;
6825
6826         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6827         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6828                 LC_CURRENT_DATA_RATE_SHIFT;
6829         if (mask & DRM_PCIE_SPEED_80) {
6830                 if (current_data_rate == 2) {
6831                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6832                         return;
6833                 }
6834                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6835         } else if (mask & DRM_PCIE_SPEED_50) {
6836                 if (current_data_rate == 1) {
6837                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6838                         return;
6839                 }
6840                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6841         }
6842
6843         bridge_pos = pci_pcie_cap(root);
6844         if (!bridge_pos)
6845                 return;
6846
6847         gpu_pos = pci_pcie_cap(rdev->pdev);
6848         if (!gpu_pos)
6849                 return;
6850
6851         if (mask & DRM_PCIE_SPEED_80) {
6852                 /* re-try equalization if gen3 is not already enabled */
6853                 if (current_data_rate != 2) {
6854                         u16 bridge_cfg, gpu_cfg;
6855                         u16 bridge_cfg2, gpu_cfg2;
6856                         u32 max_lw, current_lw, tmp;
6857
6858                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6859                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6860
6861                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6862                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6863
6864                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6865                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6866
6867                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6868                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6869                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6870
6871                         if (current_lw < max_lw) {
6872                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6873                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
6874                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6875                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6876                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6877                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6878                                 }
6879                         }
6880
6881                         for (i = 0; i < 10; i++) {
6882                                 /* check status */
6883                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6884                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6885                                         break;
6886
6887                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6888                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6889
6890                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6891                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6892
6893                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6894                                 tmp |= LC_SET_QUIESCE;
6895                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6896
6897                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6898                                 tmp |= LC_REDO_EQ;
6899                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6900
6901                                 mdelay(100);
6902
6903                                 /* linkctl */
6904                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6905                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6906                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6907                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6908
6909                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6910                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6911                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6912                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6913
6914                                 /* linkctl2 */
6915                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6916                                 tmp16 &= ~((1 << 4) | (7 << 9));
6917                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6918                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6919
6920                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6921                                 tmp16 &= ~((1 << 4) | (7 << 9));
6922                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6923                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6924
6925                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6926                                 tmp &= ~LC_SET_QUIESCE;
6927                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6928                         }
6929                 }
6930         }
6931
6932         /* set the link speed */
6933         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6934         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6935         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6936
6937         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6938         tmp16 &= ~0xf;
6939         if (mask & DRM_PCIE_SPEED_80)
6940                 tmp16 |= 3; /* gen3 */
6941         else if (mask & DRM_PCIE_SPEED_50)
6942                 tmp16 |= 2; /* gen2 */
6943         else
6944                 tmp16 |= 1; /* gen1 */
6945         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6946
6947         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6948         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6949         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6950
6951         for (i = 0; i < rdev->usec_timeout; i++) {
6952                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6953                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6954                         break;
6955                 udelay(1);
6956         }
6957 }
6958
6959 static void si_program_aspm(struct radeon_device *rdev)
6960 {
6961         u32 data, orig;
6962         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6963         bool disable_clkreq = false;
6964
6965         if (radeon_aspm == 0)
6966                 return;
6967
6968         if (!(rdev->flags & RADEON_IS_PCIE))
6969                 return;
6970
6971         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
6972         data &= ~LC_XMIT_N_FTS_MASK;
6973         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
6974         if (orig != data)
6975                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
6976
6977         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
6978         data |= LC_GO_TO_RECOVERY;
6979         if (orig != data)
6980                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
6981
6982         orig = data = RREG32_PCIE(PCIE_P_CNTL);
6983         data |= P_IGNORE_EDB_ERR;
6984         if (orig != data)
6985                 WREG32_PCIE(PCIE_P_CNTL, data);
6986
6987         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
6988         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
6989         data |= LC_PMI_TO_L1_DIS;
6990         if (!disable_l0s)
6991                 data |= LC_L0S_INACTIVITY(7);
6992
6993         if (!disable_l1) {
6994                 data |= LC_L1_INACTIVITY(7);
6995                 data &= ~LC_PMI_TO_L1_DIS;
6996                 if (orig != data)
6997                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
6998
6999                 if (!disable_plloff_in_l1) {
7000                         bool clk_req_support;
7001
7002                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7003                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7004                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7005                         if (orig != data)
7006                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7007
7008                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7009                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7010                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7011                         if (orig != data)
7012                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7013
7014                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7015                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7016                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7017                         if (orig != data)
7018                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7019
7020                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7021                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7022                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7023                         if (orig != data)
7024                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7025
7026                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7027                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7028                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7029                                 if (orig != data)
7030                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7031
7032                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7033                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7034                                 if (orig != data)
7035                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7036
7037                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7038                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7039                                 if (orig != data)
7040                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7041
7042                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7043                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7044                                 if (orig != data)
7045                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7046
7047                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7048                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7049                                 if (orig != data)
7050                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7051
7052                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7053                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7054                                 if (orig != data)
7055                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7056
7057                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7058                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7059                                 if (orig != data)
7060                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7061
7062                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7063                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7064                                 if (orig != data)
7065                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7066                         }
7067                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7068                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7069                         data |= LC_DYN_LANES_PWR_STATE(3);
7070                         if (orig != data)
7071                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7072
7073                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7074                         data &= ~LS2_EXIT_TIME_MASK;
7075                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7076                                 data |= LS2_EXIT_TIME(5);
7077                         if (orig != data)
7078                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7079
7080                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7081                         data &= ~LS2_EXIT_TIME_MASK;
7082                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7083                                 data |= LS2_EXIT_TIME(5);
7084                         if (orig != data)
7085                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7086
7087                         if (!disable_clkreq) {
7088                                 struct pci_dev *root = rdev->pdev->bus->self;
7089                                 u32 lnkcap;
7090
7091                                 clk_req_support = false;
7092                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7093                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7094                                         clk_req_support = true;
7095                         } else {
7096                                 clk_req_support = false;
7097                         }
7098
7099                         if (clk_req_support) {
7100                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7101                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7102                                 if (orig != data)
7103                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7104
7105                                 orig = data = RREG32(THM_CLK_CNTL);
7106                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7107                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7108                                 if (orig != data)
7109                                         WREG32(THM_CLK_CNTL, data);
7110
7111                                 orig = data = RREG32(MISC_CLK_CNTL);
7112                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7113                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7114                                 if (orig != data)
7115                                         WREG32(MISC_CLK_CNTL, data);
7116
7117                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7118                                 data &= ~BCLK_AS_XCLK;
7119                                 if (orig != data)
7120                                         WREG32(CG_CLKPIN_CNTL, data);
7121
7122                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7123                                 data &= ~FORCE_BIF_REFCLK_EN;
7124                                 if (orig != data)
7125                                         WREG32(CG_CLKPIN_CNTL_2, data);
7126
7127                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7128                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7129                                 data |= MPLL_CLKOUT_SEL(4);
7130                                 if (orig != data)
7131                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7132
7133                                 orig = data = RREG32(SPLL_CNTL_MODE);
7134                                 data &= ~SPLL_REFCLK_SEL_MASK;
7135                                 if (orig != data)
7136                                         WREG32(SPLL_CNTL_MODE, data);
7137                         }
7138                 }
7139         } else {
7140                 if (orig != data)
7141                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7142         }
7143
7144         orig = data = RREG32_PCIE(PCIE_CNTL2);
7145         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7146         if (orig != data)
7147                 WREG32_PCIE(PCIE_CNTL2, data);
7148
7149         if (!disable_l0s) {
7150                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7151                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7152                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7153                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7154                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7155                                 data &= ~LC_L0S_INACTIVITY_MASK;
7156                                 if (orig != data)
7157                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7158                         }
7159                 }
7160         }
7161 }