Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[linux-drm-fsl-dcu.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86                                           bool enable);
87
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91         u32 temp;
92         int actual_temp = 0;
93
94         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95                 CTF_TEMP_SHIFT;
96
97         if (temp & 0x200)
98                 actual_temp = 255;
99         else
100                 actual_temp = temp & 0x1ff;
101
102         actual_temp = actual_temp * 1000;
103
104         return actual_temp;
105 }
106
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110         u32 temp;
111         int actual_temp = 0;
112
113         temp = RREG32_SMC(0xC0300E0C);
114
115         if (temp)
116                 actual_temp = (temp / 8) - 49;
117         else
118                 actual_temp = 0;
119
120         actual_temp = actual_temp * 1000;
121
122         return actual_temp;
123 }
124
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130         unsigned long flags;
131         u32 r;
132
133         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134         WREG32(PCIE_INDEX, reg);
135         (void)RREG32(PCIE_INDEX);
136         r = RREG32(PCIE_DATA);
137         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138         return r;
139 }
140
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143         unsigned long flags;
144
145         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146         WREG32(PCIE_INDEX, reg);
147         (void)RREG32(PCIE_INDEX);
148         WREG32(PCIE_DATA, v);
149         (void)RREG32(PCIE_DATA);
150         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155         (0x0e00 << 16) | (0xc12c >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc140 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc150 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc15c >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc168 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc170 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc178 >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc204 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0xc2b4 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0xc2b8 >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0xc2bc >> 2),
176         0x00000000,
177         (0x0e00 << 16) | (0xc2c0 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x8228 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x829c >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0x869c >> 2),
184         0x00000000,
185         (0x0600 << 16) | (0x98f4 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x98f8 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x9900 >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0xc260 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x90e8 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0x3c000 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0x3c00c >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0x8c1c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0x9700 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x4e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x5e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0x6e00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0x7e00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x8e00 << 16) | (0xcd20 >> 2),
214         0x00000000,
215         (0x9e00 << 16) | (0xcd20 >> 2),
216         0x00000000,
217         (0xae00 << 16) | (0xcd20 >> 2),
218         0x00000000,
219         (0xbe00 << 16) | (0xcd20 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x89bc >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x8900 >> 2),
224         0x00000000,
225         0x3,
226         (0x0e00 << 16) | (0xc130 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc134 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc1fc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc208 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc264 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc268 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc26c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc270 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc274 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc278 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc27c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc280 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc284 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc288 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc28c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc290 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc294 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc298 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc29c >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2a0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2a4 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc2a8 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc2ac  >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc2b0 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x301d0 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x30238 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x30250 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0x30254 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x30258 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3025c >> 2),
285         0x00000000,
286         (0x4e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x5e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0x6e00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0x7e00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x8e00 << 16) | (0xc900 >> 2),
295         0x00000000,
296         (0x9e00 << 16) | (0xc900 >> 2),
297         0x00000000,
298         (0xae00 << 16) | (0xc900 >> 2),
299         0x00000000,
300         (0xbe00 << 16) | (0xc900 >> 2),
301         0x00000000,
302         (0x4e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x5e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0x6e00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0x7e00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x8e00 << 16) | (0xc904 >> 2),
311         0x00000000,
312         (0x9e00 << 16) | (0xc904 >> 2),
313         0x00000000,
314         (0xae00 << 16) | (0xc904 >> 2),
315         0x00000000,
316         (0xbe00 << 16) | (0xc904 >> 2),
317         0x00000000,
318         (0x4e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x5e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0x6e00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0x7e00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x8e00 << 16) | (0xc908 >> 2),
327         0x00000000,
328         (0x9e00 << 16) | (0xc908 >> 2),
329         0x00000000,
330         (0xae00 << 16) | (0xc908 >> 2),
331         0x00000000,
332         (0xbe00 << 16) | (0xc908 >> 2),
333         0x00000000,
334         (0x4e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x5e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0x6e00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0x7e00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x8e00 << 16) | (0xc90c >> 2),
343         0x00000000,
344         (0x9e00 << 16) | (0xc90c >> 2),
345         0x00000000,
346         (0xae00 << 16) | (0xc90c >> 2),
347         0x00000000,
348         (0xbe00 << 16) | (0xc90c >> 2),
349         0x00000000,
350         (0x4e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x5e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0x6e00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0x7e00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x8e00 << 16) | (0xc910 >> 2),
359         0x00000000,
360         (0x9e00 << 16) | (0xc910 >> 2),
361         0x00000000,
362         (0xae00 << 16) | (0xc910 >> 2),
363         0x00000000,
364         (0xbe00 << 16) | (0xc910 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc99c >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0x9834 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f00 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f00 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f04 >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f04 >> 2),
377         0x00000000,
378         (0x0000 << 16) | (0x30f08 >> 2),
379         0x00000000,
380         (0x0001 << 16) | (0x30f08 >> 2),
381         0x00000000,
382         (0x0000 << 16) | (0x30f0c >> 2),
383         0x00000000,
384         (0x0001 << 16) | (0x30f0c >> 2),
385         0x00000000,
386         (0x0600 << 16) | (0x9b7c >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8a14 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8a18 >> 2),
391         0x00000000,
392         (0x0600 << 16) | (0x30a00 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x8bf0 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x8bcc >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x8b24 >> 2),
399         0x00000000,
400         (0x0e00 << 16) | (0x30a04 >> 2),
401         0x00000000,
402         (0x0600 << 16) | (0x30a10 >> 2),
403         0x00000000,
404         (0x0600 << 16) | (0x30a14 >> 2),
405         0x00000000,
406         (0x0600 << 16) | (0x30a18 >> 2),
407         0x00000000,
408         (0x0600 << 16) | (0x30a2c >> 2),
409         0x00000000,
410         (0x0e00 << 16) | (0xc700 >> 2),
411         0x00000000,
412         (0x0e00 << 16) | (0xc704 >> 2),
413         0x00000000,
414         (0x0e00 << 16) | (0xc708 >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0xc768 >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc770 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc774 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc778 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc77c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc780 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc784 >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc788 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc78c >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc798 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc79c >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7a0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7a4 >> 2),
441         0x00000000,
442         (0x0400 << 16) | (0xc7a8 >> 2),
443         0x00000000,
444         (0x0400 << 16) | (0xc7ac >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc7b0 >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc7b4 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x9100 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x3c010 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92a8 >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92ac >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92b4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92b8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92bc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92c0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x92c4 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x92c8 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x92cc >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x92d0 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c00 >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0x8c04 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x8c20 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x8c38 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x8c3c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xae00 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x9604 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac08 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac0c >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac10 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac14 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac58 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac68 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac6c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac70 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac74 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac78 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac7c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0xac80 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xac84 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0xac88 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac8c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x970c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x9714 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0x9718 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x971c >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x4e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x5e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0x6e00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0x7e00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x8e00 << 16) | (0x31068 >> 2),
537         0x00000000,
538         (0x9e00 << 16) | (0x31068 >> 2),
539         0x00000000,
540         (0xae00 << 16) | (0x31068 >> 2),
541         0x00000000,
542         (0xbe00 << 16) | (0x31068 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xcd10 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xcd14 >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x88b0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88b4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88b8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88bc >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0x89c0 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88c4 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x88c8 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x88d0 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x88d4 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x88d8 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x8980 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30938 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x3093c >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x30940 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x89a0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x30900 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x30904 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x89b4 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x3c210 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x3c214 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x3c218 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8904 >> 2),
591         0x00000000,
592         0x5,
593         (0x0e00 << 16) | (0x8c28 >> 2),
594         (0x0e00 << 16) | (0x8c2c >> 2),
595         (0x0e00 << 16) | (0x8c30 >> 2),
596         (0x0e00 << 16) | (0x8c34 >> 2),
597         (0x0e00 << 16) | (0x9600 >> 2),
598 };
599
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602         (0x0e00 << 16) | (0xc12c >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc140 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc150 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc15c >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc168 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc170 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc204 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xc2b4 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xc2b8 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xc2bc >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xc2c0 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x8228 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x829c >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x869c >> 2),
629         0x00000000,
630         (0x0600 << 16) | (0x98f4 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x98f8 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9900 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0xc260 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x90e8 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x3c000 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0x3c00c >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0x8c1c >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0x9700 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x4e00 << 16) | (0xcd20 >> 2),
651         0x00000000,
652         (0x5e00 << 16) | (0xcd20 >> 2),
653         0x00000000,
654         (0x6e00 << 16) | (0xcd20 >> 2),
655         0x00000000,
656         (0x7e00 << 16) | (0xcd20 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0x89bc >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x8900 >> 2),
661         0x00000000,
662         0x3,
663         (0x0e00 << 16) | (0xc130 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc134 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc1fc >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc208 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc264 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc268 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc26c >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc270 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc274 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc28c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc290 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc294 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc298 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0xc2a0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc2a4 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc2a8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc2ac >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x301d0 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30238 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x30250 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x30254 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x30258 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3025c >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc900 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc900 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc900 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc900 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc904 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc904 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc904 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc904 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc908 >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc908 >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc908 >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc908 >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc90c >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc90c >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc90c >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc90c >> 2),
740         0x00000000,
741         (0x4e00 << 16) | (0xc910 >> 2),
742         0x00000000,
743         (0x5e00 << 16) | (0xc910 >> 2),
744         0x00000000,
745         (0x6e00 << 16) | (0xc910 >> 2),
746         0x00000000,
747         (0x7e00 << 16) | (0xc910 >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0xc99c >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x9834 >> 2),
752         0x00000000,
753         (0x0000 << 16) | (0x30f00 >> 2),
754         0x00000000,
755         (0x0000 << 16) | (0x30f04 >> 2),
756         0x00000000,
757         (0x0000 << 16) | (0x30f08 >> 2),
758         0x00000000,
759         (0x0000 << 16) | (0x30f0c >> 2),
760         0x00000000,
761         (0x0600 << 16) | (0x9b7c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8a14 >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8a18 >> 2),
766         0x00000000,
767         (0x0600 << 16) | (0x30a00 >> 2),
768         0x00000000,
769         (0x0e00 << 16) | (0x8bf0 >> 2),
770         0x00000000,
771         (0x0e00 << 16) | (0x8bcc >> 2),
772         0x00000000,
773         (0x0e00 << 16) | (0x8b24 >> 2),
774         0x00000000,
775         (0x0e00 << 16) | (0x30a04 >> 2),
776         0x00000000,
777         (0x0600 << 16) | (0x30a10 >> 2),
778         0x00000000,
779         (0x0600 << 16) | (0x30a14 >> 2),
780         0x00000000,
781         (0x0600 << 16) | (0x30a18 >> 2),
782         0x00000000,
783         (0x0600 << 16) | (0x30a2c >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc700 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc704 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc708 >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc768 >> 2),
792         0x00000000,
793         (0x0400 << 16) | (0xc770 >> 2),
794         0x00000000,
795         (0x0400 << 16) | (0xc774 >> 2),
796         0x00000000,
797         (0x0400 << 16) | (0xc798 >> 2),
798         0x00000000,
799         (0x0400 << 16) | (0xc79c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x9100 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x3c010 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c00 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8c04 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8c20 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8c38 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x8c3c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xae00 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x9604 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac08 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac0c >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac10 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac14 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac58 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac68 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac6c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac70 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac74 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac78 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac7c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0xac80 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xac84 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0xac88 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac8c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x970c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x9714 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0x9718 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x971c >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x4e00 << 16) | (0x31068 >> 2),
860         0x00000000,
861         (0x5e00 << 16) | (0x31068 >> 2),
862         0x00000000,
863         (0x6e00 << 16) | (0x31068 >> 2),
864         0x00000000,
865         (0x7e00 << 16) | (0x31068 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xcd10 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xcd14 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0x88b0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88b4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88b8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88bc >> 2),
878         0x00000000,
879         (0x0400 << 16) | (0x89c0 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88c4 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x88c8 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x88d0 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x88d4 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x88d8 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x8980 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30938 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x3093c >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x30940 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x89a0 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x30900 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x30904 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x89b4 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x3e1fc >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x3c210 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x3c214 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x3c218 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x8904 >> 2),
916         0x00000000,
917         0x5,
918         (0x0e00 << 16) | (0x8c28 >> 2),
919         (0x0e00 << 16) | (0x8c2c >> 2),
920         (0x0e00 << 16) | (0x8c30 >> 2),
921         (0x0e00 << 16) | (0x8c34 >> 2),
922         (0x0e00 << 16) | (0x9600 >> 2),
923 };
924
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927         0x30800, 0xe0ffffff, 0xe0000000
928 };
929
930 static const u32 bonaire_golden_common_registers[] =
931 {
932         0xc770, 0xffffffff, 0x00000800,
933         0xc774, 0xffffffff, 0x00000800,
934         0xc798, 0xffffffff, 0x00007fbf,
935         0xc79c, 0xffffffff, 0x00007faf
936 };
937
938 static const u32 bonaire_golden_registers[] =
939 {
940         0x3354, 0x00000333, 0x00000333,
941         0x3350, 0x000c0fc0, 0x00040200,
942         0x9a10, 0x00010000, 0x00058208,
943         0x3c000, 0xffff1fff, 0x00140000,
944         0x3c200, 0xfdfc0fff, 0x00000100,
945         0x3c234, 0x40000000, 0x40000200,
946         0x9830, 0xffffffff, 0x00000000,
947         0x9834, 0xf00fffff, 0x00000400,
948         0x9838, 0x0002021c, 0x00020200,
949         0xc78, 0x00000080, 0x00000000,
950         0x5bb0, 0x000000f0, 0x00000070,
951         0x5bc0, 0xf0311fff, 0x80300000,
952         0x98f8, 0x73773777, 0x12010001,
953         0x350c, 0x00810000, 0x408af000,
954         0x7030, 0x31000111, 0x00000011,
955         0x2f48, 0x73773777, 0x12010001,
956         0x220c, 0x00007fb6, 0x0021a1b1,
957         0x2210, 0x00007fb6, 0x002021b1,
958         0x2180, 0x00007fb6, 0x00002191,
959         0x2218, 0x00007fb6, 0x002121b1,
960         0x221c, 0x00007fb6, 0x002021b1,
961         0x21dc, 0x00007fb6, 0x00002191,
962         0x21e0, 0x00007fb6, 0x00002191,
963         0x3628, 0x0000003f, 0x0000000a,
964         0x362c, 0x0000003f, 0x0000000a,
965         0x2ae4, 0x00073ffe, 0x000022a2,
966         0x240c, 0x000007ff, 0x00000000,
967         0x8a14, 0xf000003f, 0x00000007,
968         0x8bf0, 0x00002001, 0x00000001,
969         0x8b24, 0xffffffff, 0x00ffffff,
970         0x30a04, 0x0000ff0f, 0x00000000,
971         0x28a4c, 0x07ffffff, 0x06000000,
972         0x4d8, 0x00000fff, 0x00000100,
973         0x3e78, 0x00000001, 0x00000002,
974         0x9100, 0x03000000, 0x0362c688,
975         0x8c00, 0x000000ff, 0x00000001,
976         0xe40, 0x00001fff, 0x00001fff,
977         0x9060, 0x0000007f, 0x00000020,
978         0x9508, 0x00010000, 0x00010000,
979         0xac14, 0x000003ff, 0x000000f3,
980         0xac0c, 0xffffffff, 0x00001032
981 };
982
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985         0xc420, 0xffffffff, 0xfffffffc,
986         0x30800, 0xffffffff, 0xe0000000,
987         0x3c2a0, 0xffffffff, 0x00000100,
988         0x3c208, 0xffffffff, 0x00000100,
989         0x3c2c0, 0xffffffff, 0xc0000100,
990         0x3c2c8, 0xffffffff, 0xc0000100,
991         0x3c2c4, 0xffffffff, 0xc0000100,
992         0x55e4, 0xffffffff, 0x00600100,
993         0x3c280, 0xffffffff, 0x00000100,
994         0x3c214, 0xffffffff, 0x06000100,
995         0x3c220, 0xffffffff, 0x00000100,
996         0x3c218, 0xffffffff, 0x06000100,
997         0x3c204, 0xffffffff, 0x00000100,
998         0x3c2e0, 0xffffffff, 0x00000100,
999         0x3c224, 0xffffffff, 0x00000100,
1000         0x3c200, 0xffffffff, 0x00000100,
1001         0x3c230, 0xffffffff, 0x00000100,
1002         0x3c234, 0xffffffff, 0x00000100,
1003         0x3c250, 0xffffffff, 0x00000100,
1004         0x3c254, 0xffffffff, 0x00000100,
1005         0x3c258, 0xffffffff, 0x00000100,
1006         0x3c25c, 0xffffffff, 0x00000100,
1007         0x3c260, 0xffffffff, 0x00000100,
1008         0x3c27c, 0xffffffff, 0x00000100,
1009         0x3c278, 0xffffffff, 0x00000100,
1010         0x3c210, 0xffffffff, 0x06000100,
1011         0x3c290, 0xffffffff, 0x00000100,
1012         0x3c274, 0xffffffff, 0x00000100,
1013         0x3c2b4, 0xffffffff, 0x00000100,
1014         0x3c2b0, 0xffffffff, 0x00000100,
1015         0x3c270, 0xffffffff, 0x00000100,
1016         0x30800, 0xffffffff, 0xe0000000,
1017         0x3c020, 0xffffffff, 0x00010000,
1018         0x3c024, 0xffffffff, 0x00030002,
1019         0x3c028, 0xffffffff, 0x00040007,
1020         0x3c02c, 0xffffffff, 0x00060005,
1021         0x3c030, 0xffffffff, 0x00090008,
1022         0x3c034, 0xffffffff, 0x00010000,
1023         0x3c038, 0xffffffff, 0x00030002,
1024         0x3c03c, 0xffffffff, 0x00040007,
1025         0x3c040, 0xffffffff, 0x00060005,
1026         0x3c044, 0xffffffff, 0x00090008,
1027         0x3c048, 0xffffffff, 0x00010000,
1028         0x3c04c, 0xffffffff, 0x00030002,
1029         0x3c050, 0xffffffff, 0x00040007,
1030         0x3c054, 0xffffffff, 0x00060005,
1031         0x3c058, 0xffffffff, 0x00090008,
1032         0x3c05c, 0xffffffff, 0x00010000,
1033         0x3c060, 0xffffffff, 0x00030002,
1034         0x3c064, 0xffffffff, 0x00040007,
1035         0x3c068, 0xffffffff, 0x00060005,
1036         0x3c06c, 0xffffffff, 0x00090008,
1037         0x3c070, 0xffffffff, 0x00010000,
1038         0x3c074, 0xffffffff, 0x00030002,
1039         0x3c078, 0xffffffff, 0x00040007,
1040         0x3c07c, 0xffffffff, 0x00060005,
1041         0x3c080, 0xffffffff, 0x00090008,
1042         0x3c084, 0xffffffff, 0x00010000,
1043         0x3c088, 0xffffffff, 0x00030002,
1044         0x3c08c, 0xffffffff, 0x00040007,
1045         0x3c090, 0xffffffff, 0x00060005,
1046         0x3c094, 0xffffffff, 0x00090008,
1047         0x3c098, 0xffffffff, 0x00010000,
1048         0x3c09c, 0xffffffff, 0x00030002,
1049         0x3c0a0, 0xffffffff, 0x00040007,
1050         0x3c0a4, 0xffffffff, 0x00060005,
1051         0x3c0a8, 0xffffffff, 0x00090008,
1052         0x3c000, 0xffffffff, 0x96e00200,
1053         0x8708, 0xffffffff, 0x00900100,
1054         0xc424, 0xffffffff, 0x0020003f,
1055         0x38, 0xffffffff, 0x0140001c,
1056         0x3c, 0x000f0000, 0x000f0000,
1057         0x220, 0xffffffff, 0xC060000C,
1058         0x224, 0xc0000fff, 0x00000100,
1059         0xf90, 0xffffffff, 0x00000100,
1060         0xf98, 0x00000101, 0x00000000,
1061         0x20a8, 0xffffffff, 0x00000104,
1062         0x55e4, 0xff000fff, 0x00000100,
1063         0x30cc, 0xc0000fff, 0x00000104,
1064         0xc1e4, 0x00000001, 0x00000001,
1065         0xd00c, 0xff000ff0, 0x00000100,
1066         0xd80c, 0xff000ff0, 0x00000100
1067 };
1068
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071         0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076         0xc770, 0xffffffff, 0x00000800,
1077         0xc774, 0xffffffff, 0x00000800,
1078         0xc798, 0xffffffff, 0x00007fbf,
1079         0xc79c, 0xffffffff, 0x00007faf
1080 };
1081
1082 static const u32 spectre_golden_registers[] =
1083 {
1084         0x3c000, 0xffff1fff, 0x96940200,
1085         0x3c00c, 0xffff0001, 0xff000000,
1086         0x3c200, 0xfffc0fff, 0x00000100,
1087         0x6ed8, 0x00010101, 0x00010000,
1088         0x9834, 0xf00fffff, 0x00000400,
1089         0x9838, 0xfffffffc, 0x00020200,
1090         0x5bb0, 0x000000f0, 0x00000070,
1091         0x5bc0, 0xf0311fff, 0x80300000,
1092         0x98f8, 0x73773777, 0x12010001,
1093         0x9b7c, 0x00ff0000, 0x00fc0000,
1094         0x2f48, 0x73773777, 0x12010001,
1095         0x8a14, 0xf000003f, 0x00000007,
1096         0x8b24, 0xffffffff, 0x00ffffff,
1097         0x28350, 0x3f3f3fff, 0x00000082,
1098         0x28355, 0x0000003f, 0x00000000,
1099         0x3e78, 0x00000001, 0x00000002,
1100         0x913c, 0xffff03df, 0x00000004,
1101         0xc768, 0x00000008, 0x00000008,
1102         0x8c00, 0x000008ff, 0x00000800,
1103         0x9508, 0x00010000, 0x00010000,
1104         0xac0c, 0xffffffff, 0x54763210,
1105         0x214f8, 0x01ff01ff, 0x00000002,
1106         0x21498, 0x007ff800, 0x00200000,
1107         0x2015c, 0xffffffff, 0x00000f40,
1108         0x30934, 0xffffffff, 0x00000001
1109 };
1110
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113         0xc420, 0xffffffff, 0xfffffffc,
1114         0x30800, 0xffffffff, 0xe0000000,
1115         0x3c2a0, 0xffffffff, 0x00000100,
1116         0x3c208, 0xffffffff, 0x00000100,
1117         0x3c2c0, 0xffffffff, 0x00000100,
1118         0x3c2c8, 0xffffffff, 0x00000100,
1119         0x3c2c4, 0xffffffff, 0x00000100,
1120         0x55e4, 0xffffffff, 0x00600100,
1121         0x3c280, 0xffffffff, 0x00000100,
1122         0x3c214, 0xffffffff, 0x06000100,
1123         0x3c220, 0xffffffff, 0x00000100,
1124         0x3c218, 0xffffffff, 0x06000100,
1125         0x3c204, 0xffffffff, 0x00000100,
1126         0x3c2e0, 0xffffffff, 0x00000100,
1127         0x3c224, 0xffffffff, 0x00000100,
1128         0x3c200, 0xffffffff, 0x00000100,
1129         0x3c230, 0xffffffff, 0x00000100,
1130         0x3c234, 0xffffffff, 0x00000100,
1131         0x3c250, 0xffffffff, 0x00000100,
1132         0x3c254, 0xffffffff, 0x00000100,
1133         0x3c258, 0xffffffff, 0x00000100,
1134         0x3c25c, 0xffffffff, 0x00000100,
1135         0x3c260, 0xffffffff, 0x00000100,
1136         0x3c27c, 0xffffffff, 0x00000100,
1137         0x3c278, 0xffffffff, 0x00000100,
1138         0x3c210, 0xffffffff, 0x06000100,
1139         0x3c290, 0xffffffff, 0x00000100,
1140         0x3c274, 0xffffffff, 0x00000100,
1141         0x3c2b4, 0xffffffff, 0x00000100,
1142         0x3c2b0, 0xffffffff, 0x00000100,
1143         0x3c270, 0xffffffff, 0x00000100,
1144         0x30800, 0xffffffff, 0xe0000000,
1145         0x3c020, 0xffffffff, 0x00010000,
1146         0x3c024, 0xffffffff, 0x00030002,
1147         0x3c028, 0xffffffff, 0x00040007,
1148         0x3c02c, 0xffffffff, 0x00060005,
1149         0x3c030, 0xffffffff, 0x00090008,
1150         0x3c034, 0xffffffff, 0x00010000,
1151         0x3c038, 0xffffffff, 0x00030002,
1152         0x3c03c, 0xffffffff, 0x00040007,
1153         0x3c040, 0xffffffff, 0x00060005,
1154         0x3c044, 0xffffffff, 0x00090008,
1155         0x3c048, 0xffffffff, 0x00010000,
1156         0x3c04c, 0xffffffff, 0x00030002,
1157         0x3c050, 0xffffffff, 0x00040007,
1158         0x3c054, 0xffffffff, 0x00060005,
1159         0x3c058, 0xffffffff, 0x00090008,
1160         0x3c05c, 0xffffffff, 0x00010000,
1161         0x3c060, 0xffffffff, 0x00030002,
1162         0x3c064, 0xffffffff, 0x00040007,
1163         0x3c068, 0xffffffff, 0x00060005,
1164         0x3c06c, 0xffffffff, 0x00090008,
1165         0x3c070, 0xffffffff, 0x00010000,
1166         0x3c074, 0xffffffff, 0x00030002,
1167         0x3c078, 0xffffffff, 0x00040007,
1168         0x3c07c, 0xffffffff, 0x00060005,
1169         0x3c080, 0xffffffff, 0x00090008,
1170         0x3c084, 0xffffffff, 0x00010000,
1171         0x3c088, 0xffffffff, 0x00030002,
1172         0x3c08c, 0xffffffff, 0x00040007,
1173         0x3c090, 0xffffffff, 0x00060005,
1174         0x3c094, 0xffffffff, 0x00090008,
1175         0x3c098, 0xffffffff, 0x00010000,
1176         0x3c09c, 0xffffffff, 0x00030002,
1177         0x3c0a0, 0xffffffff, 0x00040007,
1178         0x3c0a4, 0xffffffff, 0x00060005,
1179         0x3c0a8, 0xffffffff, 0x00090008,
1180         0x3c0ac, 0xffffffff, 0x00010000,
1181         0x3c0b0, 0xffffffff, 0x00030002,
1182         0x3c0b4, 0xffffffff, 0x00040007,
1183         0x3c0b8, 0xffffffff, 0x00060005,
1184         0x3c0bc, 0xffffffff, 0x00090008,
1185         0x3c000, 0xffffffff, 0x96e00200,
1186         0x8708, 0xffffffff, 0x00900100,
1187         0xc424, 0xffffffff, 0x0020003f,
1188         0x38, 0xffffffff, 0x0140001c,
1189         0x3c, 0x000f0000, 0x000f0000,
1190         0x220, 0xffffffff, 0xC060000C,
1191         0x224, 0xc0000fff, 0x00000100,
1192         0xf90, 0xffffffff, 0x00000100,
1193         0xf98, 0x00000101, 0x00000000,
1194         0x20a8, 0xffffffff, 0x00000104,
1195         0x55e4, 0xff000fff, 0x00000100,
1196         0x30cc, 0xc0000fff, 0x00000104,
1197         0xc1e4, 0x00000001, 0x00000001,
1198         0xd00c, 0xff000ff0, 0x00000100,
1199         0xd80c, 0xff000ff0, 0x00000100
1200 };
1201
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204         0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209         0xc770, 0xffffffff, 0x00000800,
1210         0xc774, 0xffffffff, 0x00000800,
1211         0xc798, 0xffffffff, 0x00007fbf,
1212         0xc79c, 0xffffffff, 0x00007faf
1213 };
1214
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217         0x3c000, 0xffffdfff, 0x6e944040,
1218         0x55e4, 0xff607fff, 0xfc000100,
1219         0x3c220, 0xff000fff, 0x00000100,
1220         0x3c224, 0xff000fff, 0x00000100,
1221         0x3c200, 0xfffc0fff, 0x00000100,
1222         0x6ed8, 0x00010101, 0x00010000,
1223         0x9830, 0xffffffff, 0x00000000,
1224         0x9834, 0xf00fffff, 0x00000400,
1225         0x5bb0, 0x000000f0, 0x00000070,
1226         0x5bc0, 0xf0311fff, 0x80300000,
1227         0x98f8, 0x73773777, 0x12010001,
1228         0x98fc, 0xffffffff, 0x00000010,
1229         0x9b7c, 0x00ff0000, 0x00fc0000,
1230         0x8030, 0x00001f0f, 0x0000100a,
1231         0x2f48, 0x73773777, 0x12010001,
1232         0x2408, 0x000fffff, 0x000c007f,
1233         0x8a14, 0xf000003f, 0x00000007,
1234         0x8b24, 0x3fff3fff, 0x00ffcfff,
1235         0x30a04, 0x0000ff0f, 0x00000000,
1236         0x28a4c, 0x07ffffff, 0x06000000,
1237         0x4d8, 0x00000fff, 0x00000100,
1238         0x3e78, 0x00000001, 0x00000002,
1239         0xc768, 0x00000008, 0x00000008,
1240         0x8c00, 0x000000ff, 0x00000003,
1241         0x214f8, 0x01ff01ff, 0x00000002,
1242         0x21498, 0x007ff800, 0x00200000,
1243         0x2015c, 0xffffffff, 0x00000f40,
1244         0x88c4, 0x001f3ae3, 0x00000082,
1245         0x88d4, 0x0000001f, 0x00000010,
1246         0x30934, 0xffffffff, 0x00000000
1247 };
1248
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251         0xc420, 0xffffffff, 0xfffffffc,
1252         0x30800, 0xffffffff, 0xe0000000,
1253         0x3c2a0, 0xffffffff, 0x00000100,
1254         0x3c208, 0xffffffff, 0x00000100,
1255         0x3c2c0, 0xffffffff, 0x00000100,
1256         0x3c2c8, 0xffffffff, 0x00000100,
1257         0x3c2c4, 0xffffffff, 0x00000100,
1258         0x55e4, 0xffffffff, 0x00600100,
1259         0x3c280, 0xffffffff, 0x00000100,
1260         0x3c214, 0xffffffff, 0x06000100,
1261         0x3c220, 0xffffffff, 0x00000100,
1262         0x3c218, 0xffffffff, 0x06000100,
1263         0x3c204, 0xffffffff, 0x00000100,
1264         0x3c2e0, 0xffffffff, 0x00000100,
1265         0x3c224, 0xffffffff, 0x00000100,
1266         0x3c200, 0xffffffff, 0x00000100,
1267         0x3c230, 0xffffffff, 0x00000100,
1268         0x3c234, 0xffffffff, 0x00000100,
1269         0x3c250, 0xffffffff, 0x00000100,
1270         0x3c254, 0xffffffff, 0x00000100,
1271         0x3c258, 0xffffffff, 0x00000100,
1272         0x3c25c, 0xffffffff, 0x00000100,
1273         0x3c260, 0xffffffff, 0x00000100,
1274         0x3c27c, 0xffffffff, 0x00000100,
1275         0x3c278, 0xffffffff, 0x00000100,
1276         0x3c210, 0xffffffff, 0x06000100,
1277         0x3c290, 0xffffffff, 0x00000100,
1278         0x3c274, 0xffffffff, 0x00000100,
1279         0x3c2b4, 0xffffffff, 0x00000100,
1280         0x3c2b0, 0xffffffff, 0x00000100,
1281         0x3c270, 0xffffffff, 0x00000100,
1282         0x30800, 0xffffffff, 0xe0000000,
1283         0x3c020, 0xffffffff, 0x00010000,
1284         0x3c024, 0xffffffff, 0x00030002,
1285         0x3c028, 0xffffffff, 0x00040007,
1286         0x3c02c, 0xffffffff, 0x00060005,
1287         0x3c030, 0xffffffff, 0x00090008,
1288         0x3c034, 0xffffffff, 0x00010000,
1289         0x3c038, 0xffffffff, 0x00030002,
1290         0x3c03c, 0xffffffff, 0x00040007,
1291         0x3c040, 0xffffffff, 0x00060005,
1292         0x3c044, 0xffffffff, 0x00090008,
1293         0x3c000, 0xffffffff, 0x96e00200,
1294         0x8708, 0xffffffff, 0x00900100,
1295         0xc424, 0xffffffff, 0x0020003f,
1296         0x38, 0xffffffff, 0x0140001c,
1297         0x3c, 0x000f0000, 0x000f0000,
1298         0x220, 0xffffffff, 0xC060000C,
1299         0x224, 0xc0000fff, 0x00000100,
1300         0x20a8, 0xffffffff, 0x00000104,
1301         0x55e4, 0xff000fff, 0x00000100,
1302         0x30cc, 0xc0000fff, 0x00000104,
1303         0xc1e4, 0x00000001, 0x00000001,
1304         0xd00c, 0xff000ff0, 0x00000100,
1305         0xd80c, 0xff000ff0, 0x00000100
1306 };
1307
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310         0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315         0x30800, 0xffffffff, 0xe0000000,
1316         0x28350, 0xffffffff, 0x3a00161a,
1317         0x28354, 0xffffffff, 0x0000002e,
1318         0x9a10, 0xffffffff, 0x00018208,
1319         0x98f8, 0xffffffff, 0x12011003
1320 };
1321
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324         0x3354, 0x00000333, 0x00000333,
1325         0x9a10, 0x00010000, 0x00058208,
1326         0x9830, 0xffffffff, 0x00000000,
1327         0x9834, 0xf00fffff, 0x00000400,
1328         0x9838, 0x0002021c, 0x00020200,
1329         0xc78, 0x00000080, 0x00000000,
1330         0x5bb0, 0x000000f0, 0x00000070,
1331         0x5bc0, 0xf0311fff, 0x80300000,
1332         0x350c, 0x00810000, 0x408af000,
1333         0x7030, 0x31000111, 0x00000011,
1334         0x2f48, 0x73773777, 0x12010001,
1335         0x2120, 0x0000007f, 0x0000001b,
1336         0x21dc, 0x00007fb6, 0x00002191,
1337         0x3628, 0x0000003f, 0x0000000a,
1338         0x362c, 0x0000003f, 0x0000000a,
1339         0x2ae4, 0x00073ffe, 0x000022a2,
1340         0x240c, 0x000007ff, 0x00000000,
1341         0x8bf0, 0x00002001, 0x00000001,
1342         0x8b24, 0xffffffff, 0x00ffffff,
1343         0x30a04, 0x0000ff0f, 0x00000000,
1344         0x28a4c, 0x07ffffff, 0x06000000,
1345         0x3e78, 0x00000001, 0x00000002,
1346         0xc768, 0x00000008, 0x00000008,
1347         0xc770, 0x00000f00, 0x00000800,
1348         0xc774, 0x00000f00, 0x00000800,
1349         0xc798, 0x00ffffff, 0x00ff7fbf,
1350         0xc79c, 0x00ffffff, 0x00ff7faf,
1351         0x8c00, 0x000000ff, 0x00000800,
1352         0xe40, 0x00001fff, 0x00001fff,
1353         0x9060, 0x0000007f, 0x00000020,
1354         0x9508, 0x00010000, 0x00010000,
1355         0xae00, 0x00100000, 0x000ff07c,
1356         0xac14, 0x000003ff, 0x0000000f,
1357         0xac10, 0xffffffff, 0x7564fdec,
1358         0xac0c, 0xffffffff, 0x3120b9a8,
1359         0xac08, 0x20000000, 0x0f9c0000
1360 };
1361
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364         0xc420, 0xffffffff, 0xfffffffd,
1365         0x30800, 0xffffffff, 0xe0000000,
1366         0x3c2a0, 0xffffffff, 0x00000100,
1367         0x3c208, 0xffffffff, 0x00000100,
1368         0x3c2c0, 0xffffffff, 0x00000100,
1369         0x3c2c8, 0xffffffff, 0x00000100,
1370         0x3c2c4, 0xffffffff, 0x00000100,
1371         0x55e4, 0xffffffff, 0x00200100,
1372         0x3c280, 0xffffffff, 0x00000100,
1373         0x3c214, 0xffffffff, 0x06000100,
1374         0x3c220, 0xffffffff, 0x00000100,
1375         0x3c218, 0xffffffff, 0x06000100,
1376         0x3c204, 0xffffffff, 0x00000100,
1377         0x3c2e0, 0xffffffff, 0x00000100,
1378         0x3c224, 0xffffffff, 0x00000100,
1379         0x3c200, 0xffffffff, 0x00000100,
1380         0x3c230, 0xffffffff, 0x00000100,
1381         0x3c234, 0xffffffff, 0x00000100,
1382         0x3c250, 0xffffffff, 0x00000100,
1383         0x3c254, 0xffffffff, 0x00000100,
1384         0x3c258, 0xffffffff, 0x00000100,
1385         0x3c25c, 0xffffffff, 0x00000100,
1386         0x3c260, 0xffffffff, 0x00000100,
1387         0x3c27c, 0xffffffff, 0x00000100,
1388         0x3c278, 0xffffffff, 0x00000100,
1389         0x3c210, 0xffffffff, 0x06000100,
1390         0x3c290, 0xffffffff, 0x00000100,
1391         0x3c274, 0xffffffff, 0x00000100,
1392         0x3c2b4, 0xffffffff, 0x00000100,
1393         0x3c2b0, 0xffffffff, 0x00000100,
1394         0x3c270, 0xffffffff, 0x00000100,
1395         0x30800, 0xffffffff, 0xe0000000,
1396         0x3c020, 0xffffffff, 0x00010000,
1397         0x3c024, 0xffffffff, 0x00030002,
1398         0x3c028, 0xffffffff, 0x00040007,
1399         0x3c02c, 0xffffffff, 0x00060005,
1400         0x3c030, 0xffffffff, 0x00090008,
1401         0x3c034, 0xffffffff, 0x00010000,
1402         0x3c038, 0xffffffff, 0x00030002,
1403         0x3c03c, 0xffffffff, 0x00040007,
1404         0x3c040, 0xffffffff, 0x00060005,
1405         0x3c044, 0xffffffff, 0x00090008,
1406         0x3c048, 0xffffffff, 0x00010000,
1407         0x3c04c, 0xffffffff, 0x00030002,
1408         0x3c050, 0xffffffff, 0x00040007,
1409         0x3c054, 0xffffffff, 0x00060005,
1410         0x3c058, 0xffffffff, 0x00090008,
1411         0x3c05c, 0xffffffff, 0x00010000,
1412         0x3c060, 0xffffffff, 0x00030002,
1413         0x3c064, 0xffffffff, 0x00040007,
1414         0x3c068, 0xffffffff, 0x00060005,
1415         0x3c06c, 0xffffffff, 0x00090008,
1416         0x3c070, 0xffffffff, 0x00010000,
1417         0x3c074, 0xffffffff, 0x00030002,
1418         0x3c078, 0xffffffff, 0x00040007,
1419         0x3c07c, 0xffffffff, 0x00060005,
1420         0x3c080, 0xffffffff, 0x00090008,
1421         0x3c084, 0xffffffff, 0x00010000,
1422         0x3c088, 0xffffffff, 0x00030002,
1423         0x3c08c, 0xffffffff, 0x00040007,
1424         0x3c090, 0xffffffff, 0x00060005,
1425         0x3c094, 0xffffffff, 0x00090008,
1426         0x3c098, 0xffffffff, 0x00010000,
1427         0x3c09c, 0xffffffff, 0x00030002,
1428         0x3c0a0, 0xffffffff, 0x00040007,
1429         0x3c0a4, 0xffffffff, 0x00060005,
1430         0x3c0a8, 0xffffffff, 0x00090008,
1431         0x3c0ac, 0xffffffff, 0x00010000,
1432         0x3c0b0, 0xffffffff, 0x00030002,
1433         0x3c0b4, 0xffffffff, 0x00040007,
1434         0x3c0b8, 0xffffffff, 0x00060005,
1435         0x3c0bc, 0xffffffff, 0x00090008,
1436         0x3c0c0, 0xffffffff, 0x00010000,
1437         0x3c0c4, 0xffffffff, 0x00030002,
1438         0x3c0c8, 0xffffffff, 0x00040007,
1439         0x3c0cc, 0xffffffff, 0x00060005,
1440         0x3c0d0, 0xffffffff, 0x00090008,
1441         0x3c0d4, 0xffffffff, 0x00010000,
1442         0x3c0d8, 0xffffffff, 0x00030002,
1443         0x3c0dc, 0xffffffff, 0x00040007,
1444         0x3c0e0, 0xffffffff, 0x00060005,
1445         0x3c0e4, 0xffffffff, 0x00090008,
1446         0x3c0e8, 0xffffffff, 0x00010000,
1447         0x3c0ec, 0xffffffff, 0x00030002,
1448         0x3c0f0, 0xffffffff, 0x00040007,
1449         0x3c0f4, 0xffffffff, 0x00060005,
1450         0x3c0f8, 0xffffffff, 0x00090008,
1451         0xc318, 0xffffffff, 0x00020200,
1452         0x3350, 0xffffffff, 0x00000200,
1453         0x15c0, 0xffffffff, 0x00000400,
1454         0x55e8, 0xffffffff, 0x00000000,
1455         0x2f50, 0xffffffff, 0x00000902,
1456         0x3c000, 0xffffffff, 0x96940200,
1457         0x8708, 0xffffffff, 0x00900100,
1458         0xc424, 0xffffffff, 0x0020003f,
1459         0x38, 0xffffffff, 0x0140001c,
1460         0x3c, 0x000f0000, 0x000f0000,
1461         0x220, 0xffffffff, 0xc060000c,
1462         0x224, 0xc0000fff, 0x00000100,
1463         0xf90, 0xffffffff, 0x00000100,
1464         0xf98, 0x00000101, 0x00000000,
1465         0x20a8, 0xffffffff, 0x00000104,
1466         0x55e4, 0xff000fff, 0x00000100,
1467         0x30cc, 0xc0000fff, 0x00000104,
1468         0xc1e4, 0x00000001, 0x00000001,
1469         0xd00c, 0xff000ff0, 0x00000100,
1470         0xd80c, 0xff000ff0, 0x00000100
1471 };
1472
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475         switch (rdev->family) {
1476         case CHIP_BONAIRE:
1477                 radeon_program_register_sequence(rdev,
1478                                                  bonaire_mgcg_cgcg_init,
1479                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480                 radeon_program_register_sequence(rdev,
1481                                                  bonaire_golden_registers,
1482                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483                 radeon_program_register_sequence(rdev,
1484                                                  bonaire_golden_common_registers,
1485                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486                 radeon_program_register_sequence(rdev,
1487                                                  bonaire_golden_spm_registers,
1488                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489                 break;
1490         case CHIP_KABINI:
1491                 radeon_program_register_sequence(rdev,
1492                                                  kalindi_mgcg_cgcg_init,
1493                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494                 radeon_program_register_sequence(rdev,
1495                                                  kalindi_golden_registers,
1496                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497                 radeon_program_register_sequence(rdev,
1498                                                  kalindi_golden_common_registers,
1499                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500                 radeon_program_register_sequence(rdev,
1501                                                  kalindi_golden_spm_registers,
1502                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503                 break;
1504         case CHIP_KAVERI:
1505                 radeon_program_register_sequence(rdev,
1506                                                  spectre_mgcg_cgcg_init,
1507                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508                 radeon_program_register_sequence(rdev,
1509                                                  spectre_golden_registers,
1510                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1511                 radeon_program_register_sequence(rdev,
1512                                                  spectre_golden_common_registers,
1513                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514                 radeon_program_register_sequence(rdev,
1515                                                  spectre_golden_spm_registers,
1516                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517                 break;
1518         case CHIP_HAWAII:
1519                 radeon_program_register_sequence(rdev,
1520                                                  hawaii_mgcg_cgcg_init,
1521                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522                 radeon_program_register_sequence(rdev,
1523                                                  hawaii_golden_registers,
1524                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525                 radeon_program_register_sequence(rdev,
1526                                                  hawaii_golden_common_registers,
1527                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528                 radeon_program_register_sequence(rdev,
1529                                                  hawaii_golden_spm_registers,
1530                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531                 break;
1532         default:
1533                 break;
1534         }
1535 }
1536
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548
1549         if (rdev->flags & RADEON_IS_IGP) {
1550                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551                         return reference_clock / 2;
1552         } else {
1553                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554                         return reference_clock / 4;
1555         }
1556         return reference_clock;
1557 }
1558
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570         if (index < rdev->doorbell.num_doorbells) {
1571                 return readl(rdev->doorbell.ptr + index);
1572         } else {
1573                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574                 return 0;
1575         }
1576 }
1577
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590         if (index < rdev->doorbell.num_doorbells) {
1591                 writel(v, rdev->doorbell.ptr + index);
1592         } else {
1593                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594         }
1595 }
1596
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601         {0x00000070, 0x04400000},
1602         {0x00000071, 0x80c01803},
1603         {0x00000072, 0x00004004},
1604         {0x00000073, 0x00000100},
1605         {0x00000074, 0x00ff0000},
1606         {0x00000075, 0x34000000},
1607         {0x00000076, 0x08000014},
1608         {0x00000077, 0x00cc08ec},
1609         {0x00000078, 0x00000400},
1610         {0x00000079, 0x00000000},
1611         {0x0000007a, 0x04090000},
1612         {0x0000007c, 0x00000000},
1613         {0x0000007e, 0x4408a8e8},
1614         {0x0000007f, 0x00000304},
1615         {0x00000080, 0x00000000},
1616         {0x00000082, 0x00000001},
1617         {0x00000083, 0x00000002},
1618         {0x00000084, 0xf3e4f400},
1619         {0x00000085, 0x052024e3},
1620         {0x00000087, 0x00000000},
1621         {0x00000088, 0x01000000},
1622         {0x0000008a, 0x1c0a0000},
1623         {0x0000008b, 0xff010000},
1624         {0x0000008d, 0xffffefff},
1625         {0x0000008e, 0xfff3efff},
1626         {0x0000008f, 0xfff3efbf},
1627         {0x00000092, 0xf7ffffff},
1628         {0x00000093, 0xffffff7f},
1629         {0x00000095, 0x00101101},
1630         {0x00000096, 0x00000fff},
1631         {0x00000097, 0x00116fff},
1632         {0x00000098, 0x60010000},
1633         {0x00000099, 0x10010000},
1634         {0x0000009a, 0x00006000},
1635         {0x0000009b, 0x00001000},
1636         {0x0000009f, 0x00b48000}
1637 };
1638
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643         {0x0000007d, 0x40000000},
1644         {0x0000007e, 0x40180304},
1645         {0x0000007f, 0x0000ff00},
1646         {0x00000081, 0x00000000},
1647         {0x00000083, 0x00000800},
1648         {0x00000086, 0x00000000},
1649         {0x00000087, 0x00000100},
1650         {0x00000088, 0x00020100},
1651         {0x00000089, 0x00000000},
1652         {0x0000008b, 0x00040000},
1653         {0x0000008c, 0x00000100},
1654         {0x0000008e, 0xff010000},
1655         {0x00000090, 0xffffefff},
1656         {0x00000091, 0xfff3efff},
1657         {0x00000092, 0xfff3efbf},
1658         {0x00000093, 0xf7ffffff},
1659         {0x00000094, 0xffffff7f},
1660         {0x00000095, 0x00000fff},
1661         {0x00000096, 0x00116fff},
1662         {0x00000097, 0x60010000},
1663         {0x00000098, 0x10010000},
1664         {0x0000009f, 0x00c79000}
1665 };
1666
1667
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682                             u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685                              MEID(me & 0x3) |
1686                              VMID(vmid & 0xf) |
1687                              QUEUEID(queue & 0x7));
1688         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 static int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702         const __be32 *fw_data;
1703         u32 running, blackout = 0;
1704         u32 *io_mc_regs;
1705         int i, ucode_size, regs_size;
1706
1707         if (!rdev->mc_fw)
1708                 return -EINVAL;
1709
1710         switch (rdev->family) {
1711         case CHIP_BONAIRE:
1712                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713                 ucode_size = CIK_MC_UCODE_SIZE;
1714                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715                 break;
1716         case CHIP_HAWAII:
1717                 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718                 ucode_size = HAWAII_MC_UCODE_SIZE;
1719                 regs_size = HAWAII_IO_MC_REGS_SIZE;
1720                 break;
1721         default:
1722                 return -EINVAL;
1723         }
1724
1725         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726
1727         if (running == 0) {
1728                 if (running) {
1729                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731                 }
1732
1733                 /* reset the engine and set to writable */
1734                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736
1737                 /* load mc io regs */
1738                 for (i = 0; i < regs_size; i++) {
1739                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741                 }
1742                 /* load the MC ucode */
1743                 fw_data = (const __be32 *)rdev->mc_fw->data;
1744                 for (i = 0; i < ucode_size; i++)
1745                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746
1747                 /* put the engine back into the active state */
1748                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751
1752                 /* wait for training to complete */
1753                 for (i = 0; i < rdev->usec_timeout; i++) {
1754                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755                                 break;
1756                         udelay(1);
1757                 }
1758                 for (i = 0; i < rdev->usec_timeout; i++) {
1759                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760                                 break;
1761                         udelay(1);
1762                 }
1763
1764                 if (running)
1765                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766         }
1767
1768         return 0;
1769 }
1770
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782         const char *chip_name;
1783         size_t pfp_req_size, me_req_size, ce_req_size,
1784                 mec_req_size, rlc_req_size, mc_req_size = 0,
1785                 sdma_req_size, smc_req_size = 0;
1786         char fw_name[30];
1787         int err;
1788
1789         DRM_DEBUG("\n");
1790
1791         switch (rdev->family) {
1792         case CHIP_BONAIRE:
1793                 chip_name = "BONAIRE";
1794                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1796                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802                 break;
1803         case CHIP_HAWAII:
1804                 chip_name = "HAWAII";
1805                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1807                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813                 break;
1814         case CHIP_KAVERI:
1815                 chip_name = "KAVERI";
1816                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1818                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822                 break;
1823         case CHIP_KABINI:
1824                 chip_name = "KABINI";
1825                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1827                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831                 break;
1832         default: BUG();
1833         }
1834
1835         DRM_INFO("Loading %s Microcode\n", chip_name);
1836
1837         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839         if (err)
1840                 goto out;
1841         if (rdev->pfp_fw->size != pfp_req_size) {
1842                 printk(KERN_ERR
1843                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844                        rdev->pfp_fw->size, fw_name);
1845                 err = -EINVAL;
1846                 goto out;
1847         }
1848
1849         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851         if (err)
1852                 goto out;
1853         if (rdev->me_fw->size != me_req_size) {
1854                 printk(KERN_ERR
1855                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856                        rdev->me_fw->size, fw_name);
1857                 err = -EINVAL;
1858         }
1859
1860         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862         if (err)
1863                 goto out;
1864         if (rdev->ce_fw->size != ce_req_size) {
1865                 printk(KERN_ERR
1866                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867                        rdev->ce_fw->size, fw_name);
1868                 err = -EINVAL;
1869         }
1870
1871         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873         if (err)
1874                 goto out;
1875         if (rdev->mec_fw->size != mec_req_size) {
1876                 printk(KERN_ERR
1877                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878                        rdev->mec_fw->size, fw_name);
1879                 err = -EINVAL;
1880         }
1881
1882         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884         if (err)
1885                 goto out;
1886         if (rdev->rlc_fw->size != rlc_req_size) {
1887                 printk(KERN_ERR
1888                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889                        rdev->rlc_fw->size, fw_name);
1890                 err = -EINVAL;
1891         }
1892
1893         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895         if (err)
1896                 goto out;
1897         if (rdev->sdma_fw->size != sdma_req_size) {
1898                 printk(KERN_ERR
1899                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900                        rdev->sdma_fw->size, fw_name);
1901                 err = -EINVAL;
1902         }
1903
1904         /* No SMC, MC ucode on APUs */
1905         if (!(rdev->flags & RADEON_IS_IGP)) {
1906                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908                 if (err)
1909                         goto out;
1910                 if (rdev->mc_fw->size != mc_req_size) {
1911                         printk(KERN_ERR
1912                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913                                rdev->mc_fw->size, fw_name);
1914                         err = -EINVAL;
1915                 }
1916
1917                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919                 if (err) {
1920                         printk(KERN_ERR
1921                                "smc: error loading firmware \"%s\"\n",
1922                                fw_name);
1923                         release_firmware(rdev->smc_fw);
1924                         rdev->smc_fw = NULL;
1925                         err = 0;
1926                 } else if (rdev->smc_fw->size != smc_req_size) {
1927                         printk(KERN_ERR
1928                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929                                rdev->smc_fw->size, fw_name);
1930                         err = -EINVAL;
1931                 }
1932         }
1933
1934 out:
1935         if (err) {
1936                 if (err != -EINVAL)
1937                         printk(KERN_ERR
1938                                "cik_cp: Failed to load firmware \"%s\"\n",
1939                                fw_name);
1940                 release_firmware(rdev->pfp_fw);
1941                 rdev->pfp_fw = NULL;
1942                 release_firmware(rdev->me_fw);
1943                 rdev->me_fw = NULL;
1944                 release_firmware(rdev->ce_fw);
1945                 rdev->ce_fw = NULL;
1946                 release_firmware(rdev->rlc_fw);
1947                 rdev->rlc_fw = NULL;
1948                 release_firmware(rdev->mc_fw);
1949                 rdev->mc_fw = NULL;
1950                 release_firmware(rdev->smc_fw);
1951                 rdev->smc_fw = NULL;
1952         }
1953         return err;
1954 }
1955
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972         const u32 num_tile_mode_states = 32;
1973         const u32 num_secondary_tile_mode_states = 16;
1974         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975         u32 num_pipe_configs;
1976         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977                 rdev->config.cik.max_shader_engines;
1978
1979         switch (rdev->config.cik.mem_row_size_in_kb) {
1980         case 1:
1981                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982                 break;
1983         case 2:
1984         default:
1985                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986                 break;
1987         case 4:
1988                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989                 break;
1990         }
1991
1992         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993         if (num_pipe_configs > 8)
1994                 num_pipe_configs = 16;
1995
1996         if (num_pipe_configs == 16) {
1997                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998                         switch (reg_offset) {
1999                         case 0:
2000                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004                                 break;
2005                         case 1:
2006                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010                                 break;
2011                         case 2:
2012                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016                                 break;
2017                         case 3:
2018                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022                                 break;
2023                         case 4:
2024                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027                                                  TILE_SPLIT(split_equal_to_row_size));
2028                                 break;
2029                         case 5:
2030                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032                                 break;
2033                         case 6:
2034                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038                                 break;
2039                         case 7:
2040                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043                                                  TILE_SPLIT(split_equal_to_row_size));
2044                                 break;
2045                         case 8:
2046                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048                                 break;
2049                         case 9:
2050                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052                                 break;
2053                         case 10:
2054                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058                                 break;
2059                         case 11:
2060                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064                                 break;
2065                         case 12:
2066                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070                                 break;
2071                         case 13:
2072                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074                                 break;
2075                         case 14:
2076                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                                 break;
2081                         case 16:
2082                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                                 break;
2087                         case 17:
2088                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092                                 break;
2093                         case 27:
2094                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096                                 break;
2097                         case 28:
2098                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102                                 break;
2103                         case 29:
2104                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108                                 break;
2109                         case 30:
2110                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114                                 break;
2115                         default:
2116                                 gb_tile_moden = 0;
2117                                 break;
2118                         }
2119                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121                 }
2122                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123                         switch (reg_offset) {
2124                         case 0:
2125                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2129                                 break;
2130                         case 1:
2131                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2135                                 break;
2136                         case 2:
2137                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2141                                 break;
2142                         case 3:
2143                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2147                                 break;
2148                         case 4:
2149                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2153                                 break;
2154                         case 5:
2155                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2159                                 break;
2160                         case 6:
2161                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2165                                 break;
2166                         case 8:
2167                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2171                                 break;
2172                         case 9:
2173                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2177                                 break;
2178                         case 10:
2179                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2183                                 break;
2184                         case 11:
2185                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189                                 break;
2190                         case 12:
2191                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2195                                 break;
2196                         case 13:
2197                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2201                                 break;
2202                         case 14:
2203                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2207                                 break;
2208                         default:
2209                                 gb_tile_moden = 0;
2210                                 break;
2211                         }
2212                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213                 }
2214         } else if (num_pipe_configs == 8) {
2215                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216                         switch (reg_offset) {
2217                         case 0:
2218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222                                 break;
2223                         case 1:
2224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228                                 break;
2229                         case 2:
2230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234                                 break;
2235                         case 3:
2236                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240                                 break;
2241                         case 4:
2242                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245                                                  TILE_SPLIT(split_equal_to_row_size));
2246                                 break;
2247                         case 5:
2248                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                                 break;
2251                         case 6:
2252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256                                 break;
2257                         case 7:
2258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261                                                  TILE_SPLIT(split_equal_to_row_size));
2262                                 break;
2263                         case 8:
2264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266                                 break;
2267                         case 9:
2268                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270                                 break;
2271                         case 10:
2272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                                 break;
2277                         case 11:
2278                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                                 break;
2283                         case 12:
2284                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                                 break;
2289                         case 13:
2290                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292                                 break;
2293                         case 14:
2294                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298                                 break;
2299                         case 16:
2300                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                                 break;
2305                         case 17:
2306                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310                                 break;
2311                         case 27:
2312                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314                                 break;
2315                         case 28:
2316                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                                 break;
2321                         case 29:
2322                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                                 break;
2327                         case 30:
2328                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332                                 break;
2333                         default:
2334                                 gb_tile_moden = 0;
2335                                 break;
2336                         }
2337                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339                 }
2340                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341                         switch (reg_offset) {
2342                         case 0:
2343                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2347                                 break;
2348                         case 1:
2349                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                                 break;
2354                         case 2:
2355                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2359                                 break;
2360                         case 3:
2361                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2365                                 break;
2366                         case 4:
2367                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                                 break;
2372                         case 5:
2373                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2377                                 break;
2378                         case 6:
2379                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2383                                 break;
2384                         case 8:
2385                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                                 break;
2390                         case 9:
2391                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2395                                 break;
2396                         case 10:
2397                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2401                                 break;
2402                         case 11:
2403                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2407                                 break;
2408                         case 12:
2409                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2413                                 break;
2414                         case 13:
2415                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2419                                 break;
2420                         case 14:
2421                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2425                                 break;
2426                         default:
2427                                 gb_tile_moden = 0;
2428                                 break;
2429                         }
2430                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432                 }
2433         } else if (num_pipe_configs == 4) {
2434                 if (num_rbs == 4) {
2435                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436                                 switch (reg_offset) {
2437                                 case 0:
2438                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442                                         break;
2443                                 case 1:
2444                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448                                         break;
2449                                 case 2:
2450                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454                                         break;
2455                                 case 3:
2456                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460                                         break;
2461                                 case 4:
2462                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465                                                          TILE_SPLIT(split_equal_to_row_size));
2466                                         break;
2467                                 case 5:
2468                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470                                         break;
2471                                 case 6:
2472                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476                                         break;
2477                                 case 7:
2478                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                                          TILE_SPLIT(split_equal_to_row_size));
2482                                         break;
2483                                 case 8:
2484                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486                                         break;
2487                                 case 9:
2488                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490                                         break;
2491                                 case 10:
2492                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496                                         break;
2497                                 case 11:
2498                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                                         break;
2503                                 case 12:
2504                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508                                         break;
2509                                 case 13:
2510                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512                                         break;
2513                                 case 14:
2514                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518                                         break;
2519                                 case 16:
2520                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                                         break;
2525                                 case 17:
2526                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530                                         break;
2531                                 case 27:
2532                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534                                         break;
2535                                 case 28:
2536                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540                                         break;
2541                                 case 29:
2542                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                                         break;
2547                                 case 30:
2548                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                                         break;
2553                                 default:
2554                                         gb_tile_moden = 0;
2555                                         break;
2556                                 }
2557                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559                         }
2560                 } else if (num_rbs < 4) {
2561                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562                                 switch (reg_offset) {
2563                                 case 0:
2564                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568                                         break;
2569                                 case 1:
2570                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574                                         break;
2575                                 case 2:
2576                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580                                         break;
2581                                 case 3:
2582                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586                                         break;
2587                                 case 4:
2588                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591                                                          TILE_SPLIT(split_equal_to_row_size));
2592                                         break;
2593                                 case 5:
2594                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596                                         break;
2597                                 case 6:
2598                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602                                         break;
2603                                 case 7:
2604                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607                                                          TILE_SPLIT(split_equal_to_row_size));
2608                                         break;
2609                                 case 8:
2610                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612                                         break;
2613                                 case 9:
2614                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616                                         break;
2617                                 case 10:
2618                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                                         break;
2623                                 case 11:
2624                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628                                         break;
2629                                 case 12:
2630                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                                         break;
2635                                 case 13:
2636                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638                                         break;
2639                                 case 14:
2640                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                         break;
2645                                 case 16:
2646                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                         break;
2651                                 case 17:
2652                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                         break;
2657                                 case 27:
2658                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660                                         break;
2661                                 case 28:
2662                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666                                         break;
2667                                 case 29:
2668                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672                                         break;
2673                                 case 30:
2674                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                                         break;
2679                                 default:
2680                                         gb_tile_moden = 0;
2681                                         break;
2682                                 }
2683                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685                         }
2686                 }
2687                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688                         switch (reg_offset) {
2689                         case 0:
2690                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2694                                 break;
2695                         case 1:
2696                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2700                                 break;
2701                         case 2:
2702                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2706                                 break;
2707                         case 3:
2708                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2712                                 break;
2713                         case 4:
2714                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2718                                 break;
2719                         case 5:
2720                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2724                                 break;
2725                         case 6:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2730                                 break;
2731                         case 8:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 9:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 10:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 11:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2754                                 break;
2755                         case 12:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2760                                 break;
2761                         case 13:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2766                                 break;
2767                         case 14:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2772                                 break;
2773                         default:
2774                                 gb_tile_moden = 0;
2775                                 break;
2776                         }
2777                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779                 }
2780         } else if (num_pipe_configs == 2) {
2781                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782                         switch (reg_offset) {
2783                         case 0:
2784                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2787                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788                                 break;
2789                         case 1:
2790                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2793                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794                                 break;
2795                         case 2:
2796                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2799                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800                                 break;
2801                         case 3:
2802                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2805                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806                                 break;
2807                         case 4:
2808                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2811                                                  TILE_SPLIT(split_equal_to_row_size));
2812                                 break;
2813                         case 5:
2814                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816                                 break;
2817                         case 6:
2818                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2821                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822                                 break;
2823                         case 7:
2824                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2827                                                  TILE_SPLIT(split_equal_to_row_size));
2828                                 break;
2829                         case 8:
2830                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831                                 break;
2832                         case 9:
2833                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835                                 break;
2836                         case 10:
2837                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2840                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841                                 break;
2842                         case 11:
2843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2846                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                                 break;
2848                         case 12:
2849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2852                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                                 break;
2854                         case 13:
2855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857                                 break;
2858                         case 14:
2859                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2862                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                                 break;
2864                         case 16:
2865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2868                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869                                 break;
2870                         case 17:
2871                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2874                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875                                 break;
2876                         case 27:
2877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879                                 break;
2880                         case 28:
2881                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2884                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                                 break;
2886                         case 29:
2887                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2890                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                                 break;
2892                         case 30:
2893                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2896                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897                                 break;
2898                         default:
2899                                 gb_tile_moden = 0;
2900                                 break;
2901                         }
2902                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904                 }
2905                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906                         switch (reg_offset) {
2907                         case 0:
2908                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2912                                 break;
2913                         case 1:
2914                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2918                                 break;
2919                         case 2:
2920                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2924                                 break;
2925                         case 3:
2926                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2930                                 break;
2931                         case 4:
2932                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2936                                 break;
2937                         case 5:
2938                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2942                                 break;
2943                         case 6:
2944                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2948                                 break;
2949                         case 8:
2950                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2954                                 break;
2955                         case 9:
2956                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2960                                 break;
2961                         case 10:
2962                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2966                                 break;
2967                         case 11:
2968                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2972                                 break;
2973                         case 12:
2974                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2978                                 break;
2979                         case 13:
2980                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2984                                 break;
2985                         case 14:
2986                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2990                                 break;
2991                         default:
2992                                 gb_tile_moden = 0;
2993                                 break;
2994                         }
2995                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997                 }
2998         } else
2999                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014                              u32 se_num, u32 sh_num)
3015 {
3016         u32 data = INSTANCE_BROADCAST_WRITES;
3017
3018         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020         else if (se_num == 0xffffffff)
3021                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022         else if (sh_num == 0xffffffff)
3023                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024         else
3025                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026         WREG32(GRBM_GFX_INDEX, data);
3027 }
3028
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039         u32 i, mask = 0;
3040
3041         for (i = 0; i < bit_width; i++) {
3042                 mask <<= 1;
3043                 mask |= 1;
3044         }
3045         return mask;
3046 }
3047
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060                               u32 max_rb_num_per_se,
3061                               u32 sh_per_se)
3062 {
3063         u32 data, mask;
3064
3065         data = RREG32(CC_RB_BACKEND_DISABLE);
3066         if (data & 1)
3067                 data &= BACKEND_DISABLE_MASK;
3068         else
3069                 data = 0;
3070         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071
3072         data >>= BACKEND_DISABLE_SHIFT;
3073
3074         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3075
3076         return data & mask;
3077 }
3078
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090                          u32 se_num, u32 sh_per_se,
3091                          u32 max_rb_num_per_se)
3092 {
3093         int i, j;
3094         u32 data, mask;
3095         u32 disabled_rbs = 0;
3096         u32 enabled_rbs = 0;
3097
3098         for (i = 0; i < se_num; i++) {
3099                 for (j = 0; j < sh_per_se; j++) {
3100                         cik_select_se_sh(rdev, i, j);
3101                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102                         if (rdev->family == CHIP_HAWAII)
3103                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104                         else
3105                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106                 }
3107         }
3108         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109
3110         mask = 1;
3111         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112                 if (!(disabled_rbs & mask))
3113                         enabled_rbs |= mask;
3114                 mask <<= 1;
3115         }
3116
3117         rdev->config.cik.backend_enable_mask = enabled_rbs;
3118
3119         for (i = 0; i < se_num; i++) {
3120                 cik_select_se_sh(rdev, i, 0xffffffff);
3121                 data = 0;
3122                 for (j = 0; j < sh_per_se; j++) {
3123                         switch (enabled_rbs & 3) {
3124                         case 0:
3125                                 if (j == 0)
3126                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3127                                 else
3128                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3129                                 break;
3130                         case 1:
3131                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3132                                 break;
3133                         case 2:
3134                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3135                                 break;
3136                         case 3:
3137                         default:
3138                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3139                                 break;
3140                         }
3141                         enabled_rbs >>= 2;
3142                 }
3143                 WREG32(PA_SC_RASTER_CONFIG, data);
3144         }
3145         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3146 }
3147
3148 /**
3149  * cik_gpu_init - setup the 3D engine
3150  *
3151  * @rdev: radeon_device pointer
3152  *
3153  * Configures the 3D engine and tiling configuration
3154  * registers so that the 3D engine is usable.
3155  */
3156 static void cik_gpu_init(struct radeon_device *rdev)
3157 {
3158         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159         u32 mc_shared_chmap, mc_arb_ramcfg;
3160         u32 hdp_host_path_cntl;
3161         u32 tmp;
3162         int i, j;
3163
3164         switch (rdev->family) {
3165         case CHIP_BONAIRE:
3166                 rdev->config.cik.max_shader_engines = 2;
3167                 rdev->config.cik.max_tile_pipes = 4;
3168                 rdev->config.cik.max_cu_per_sh = 7;
3169                 rdev->config.cik.max_sh_per_se = 1;
3170                 rdev->config.cik.max_backends_per_se = 2;
3171                 rdev->config.cik.max_texture_channel_caches = 4;
3172                 rdev->config.cik.max_gprs = 256;
3173                 rdev->config.cik.max_gs_threads = 32;
3174                 rdev->config.cik.max_hw_contexts = 8;
3175
3176                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3181                 break;
3182         case CHIP_HAWAII:
3183                 rdev->config.cik.max_shader_engines = 4;
3184                 rdev->config.cik.max_tile_pipes = 16;
3185                 rdev->config.cik.max_cu_per_sh = 11;
3186                 rdev->config.cik.max_sh_per_se = 1;
3187                 rdev->config.cik.max_backends_per_se = 4;
3188                 rdev->config.cik.max_texture_channel_caches = 16;
3189                 rdev->config.cik.max_gprs = 256;
3190                 rdev->config.cik.max_gs_threads = 32;
3191                 rdev->config.cik.max_hw_contexts = 8;
3192
3193                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3198                 break;
3199         case CHIP_KAVERI:
3200                 rdev->config.cik.max_shader_engines = 1;
3201                 rdev->config.cik.max_tile_pipes = 4;
3202                 if ((rdev->pdev->device == 0x1304) ||
3203                     (rdev->pdev->device == 0x1305) ||
3204                     (rdev->pdev->device == 0x130C) ||
3205                     (rdev->pdev->device == 0x130F) ||
3206                     (rdev->pdev->device == 0x1310) ||
3207                     (rdev->pdev->device == 0x1311) ||
3208                     (rdev->pdev->device == 0x131C)) {
3209                         rdev->config.cik.max_cu_per_sh = 8;
3210                         rdev->config.cik.max_backends_per_se = 2;
3211                 } else if ((rdev->pdev->device == 0x1309) ||
3212                            (rdev->pdev->device == 0x130A) ||
3213                            (rdev->pdev->device == 0x130D) ||
3214                            (rdev->pdev->device == 0x1313) ||
3215                            (rdev->pdev->device == 0x131D)) {
3216                         rdev->config.cik.max_cu_per_sh = 6;
3217                         rdev->config.cik.max_backends_per_se = 2;
3218                 } else if ((rdev->pdev->device == 0x1306) ||
3219                            (rdev->pdev->device == 0x1307) ||
3220                            (rdev->pdev->device == 0x130B) ||
3221                            (rdev->pdev->device == 0x130E) ||
3222                            (rdev->pdev->device == 0x1315) ||
3223                            (rdev->pdev->device == 0x131B)) {
3224                         rdev->config.cik.max_cu_per_sh = 4;
3225                         rdev->config.cik.max_backends_per_se = 1;
3226                 } else {
3227                         rdev->config.cik.max_cu_per_sh = 3;
3228                         rdev->config.cik.max_backends_per_se = 1;
3229                 }
3230                 rdev->config.cik.max_sh_per_se = 1;
3231                 rdev->config.cik.max_texture_channel_caches = 4;
3232                 rdev->config.cik.max_gprs = 256;
3233                 rdev->config.cik.max_gs_threads = 16;
3234                 rdev->config.cik.max_hw_contexts = 8;
3235
3236                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241                 break;
3242         case CHIP_KABINI:
3243         default:
3244                 rdev->config.cik.max_shader_engines = 1;
3245                 rdev->config.cik.max_tile_pipes = 2;
3246                 rdev->config.cik.max_cu_per_sh = 2;
3247                 rdev->config.cik.max_sh_per_se = 1;
3248                 rdev->config.cik.max_backends_per_se = 1;
3249                 rdev->config.cik.max_texture_channel_caches = 2;
3250                 rdev->config.cik.max_gprs = 256;
3251                 rdev->config.cik.max_gs_threads = 16;
3252                 rdev->config.cik.max_hw_contexts = 8;
3253
3254                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3259                 break;
3260         }
3261
3262         /* Initialize HDP */
3263         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264                 WREG32((0x2c14 + j), 0x00000000);
3265                 WREG32((0x2c18 + j), 0x00000000);
3266                 WREG32((0x2c1c + j), 0x00000000);
3267                 WREG32((0x2c20 + j), 0x00000000);
3268                 WREG32((0x2c24 + j), 0x00000000);
3269         }
3270
3271         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3272
3273         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3274
3275         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3277
3278         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279         rdev->config.cik.mem_max_burst_length_bytes = 256;
3280         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282         if (rdev->config.cik.mem_row_size_in_kb > 4)
3283                 rdev->config.cik.mem_row_size_in_kb = 4;
3284         /* XXX use MC settings? */
3285         rdev->config.cik.shader_engine_tile_size = 32;
3286         rdev->config.cik.num_gpus = 1;
3287         rdev->config.cik.multi_gpu_tile_size = 64;
3288
3289         /* fix up row size */
3290         gb_addr_config &= ~ROW_SIZE_MASK;
3291         switch (rdev->config.cik.mem_row_size_in_kb) {
3292         case 1:
3293         default:
3294                 gb_addr_config |= ROW_SIZE(0);
3295                 break;
3296         case 2:
3297                 gb_addr_config |= ROW_SIZE(1);
3298                 break;
3299         case 4:
3300                 gb_addr_config |= ROW_SIZE(2);
3301                 break;
3302         }
3303
3304         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3305          * not have bank info, so create a custom tiling dword.
3306          * bits 3:0   num_pipes
3307          * bits 7:4   num_banks
3308          * bits 11:8  group_size
3309          * bits 15:12 row_size
3310          */
3311         rdev->config.cik.tile_config = 0;
3312         switch (rdev->config.cik.num_tile_pipes) {
3313         case 1:
3314                 rdev->config.cik.tile_config |= (0 << 0);
3315                 break;
3316         case 2:
3317                 rdev->config.cik.tile_config |= (1 << 0);
3318                 break;
3319         case 4:
3320                 rdev->config.cik.tile_config |= (2 << 0);
3321                 break;
3322         case 8:
3323         default:
3324                 /* XXX what about 12? */
3325                 rdev->config.cik.tile_config |= (3 << 0);
3326                 break;
3327         }
3328         rdev->config.cik.tile_config |=
3329                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330         rdev->config.cik.tile_config |=
3331                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332         rdev->config.cik.tile_config |=
3333                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3334
3335         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3343
3344         cik_tiling_mode_table_init(rdev);
3345
3346         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347                      rdev->config.cik.max_sh_per_se,
3348                      rdev->config.cik.max_backends_per_se);
3349
3350         /* set HW defaults for 3D engine */
3351         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3352
3353         WREG32(SX_DEBUG_1, 0x20);
3354
3355         WREG32(TA_CNTL_AUX, 0x00010000);
3356
3357         tmp = RREG32(SPI_CONFIG_CNTL);
3358         tmp |= 0x03000000;
3359         WREG32(SPI_CONFIG_CNTL, tmp);
3360
3361         WREG32(SQ_CONFIG, 1);
3362
3363         WREG32(DB_DEBUG, 0);
3364
3365         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3366         tmp |= 0x00000400;
3367         WREG32(DB_DEBUG2, tmp);
3368
3369         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3370         tmp |= 0x00020200;
3371         WREG32(DB_DEBUG3, tmp);
3372
3373         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3374         tmp |= 0x00018208;
3375         WREG32(CB_HW_CONTROL, tmp);
3376
3377         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3378
3379         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3383
3384         WREG32(VGT_NUM_INSTANCES, 1);
3385
3386         WREG32(CP_PERFMON_CNTL, 0);
3387
3388         WREG32(SQ_CONFIG, 0);
3389
3390         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391                                           FORCE_EOV_MAX_REZ_CNT(255)));
3392
3393         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3395
3396         WREG32(VGT_GS_VERTEX_REUSE, 16);
3397         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3398
3399         tmp = RREG32(HDP_MISC_CNTL);
3400         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401         WREG32(HDP_MISC_CNTL, tmp);
3402
3403         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3405
3406         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3408
3409         udelay(50);
3410 }
3411
3412 /*
3413  * GPU scratch registers helpers function.
3414  */
3415 /**
3416  * cik_scratch_init - setup driver info for CP scratch regs
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Set up the number and offset of the CP scratch registers.
3421  * NOTE: use of CP scratch registers is a legacy inferface and
3422  * is not used by default on newer asics (r6xx+).  On newer asics,
3423  * memory buffers are used for fences rather than scratch regs.
3424  */
3425 static void cik_scratch_init(struct radeon_device *rdev)
3426 {
3427         int i;
3428
3429         rdev->scratch.num_reg = 7;
3430         rdev->scratch.reg_base = SCRATCH_REG0;
3431         for (i = 0; i < rdev->scratch.num_reg; i++) {
3432                 rdev->scratch.free[i] = true;
3433                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3434         }
3435 }
3436
3437 /**
3438  * cik_ring_test - basic gfx ring test
3439  *
3440  * @rdev: radeon_device pointer
3441  * @ring: radeon_ring structure holding ring information
3442  *
3443  * Allocate a scratch register and write to it using the gfx ring (CIK).
3444  * Provides a basic gfx ring test to verify that the ring is working.
3445  * Used by cik_cp_gfx_resume();
3446  * Returns 0 on success, error on failure.
3447  */
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3449 {
3450         uint32_t scratch;
3451         uint32_t tmp = 0;
3452         unsigned i;
3453         int r;
3454
3455         r = radeon_scratch_get(rdev, &scratch);
3456         if (r) {
3457                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3458                 return r;
3459         }
3460         WREG32(scratch, 0xCAFEDEAD);
3461         r = radeon_ring_lock(rdev, ring, 3);
3462         if (r) {
3463                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464                 radeon_scratch_free(rdev, scratch);
3465                 return r;
3466         }
3467         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469         radeon_ring_write(ring, 0xDEADBEEF);
3470         radeon_ring_unlock_commit(rdev, ring);
3471
3472         for (i = 0; i < rdev->usec_timeout; i++) {
3473                 tmp = RREG32(scratch);
3474                 if (tmp == 0xDEADBEEF)
3475                         break;
3476                 DRM_UDELAY(1);
3477         }
3478         if (i < rdev->usec_timeout) {
3479                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3480         } else {
3481                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482                           ring->idx, scratch, tmp);
3483                 r = -EINVAL;
3484         }
3485         radeon_scratch_free(rdev, scratch);
3486         return r;
3487 }
3488
3489 /**
3490  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3491  *
3492  * @rdev: radeon_device pointer
3493  * @fence: radeon fence object
3494  *
3495  * Emits a fence sequnce number on the gfx ring and flushes
3496  * GPU caches.
3497  */
3498 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3499                              struct radeon_fence *fence)
3500 {
3501         struct radeon_ring *ring = &rdev->ring[fence->ring];
3502         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3503
3504         /* EVENT_WRITE_EOP - flush caches, send int */
3505         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3506         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3507                                  EOP_TC_ACTION_EN |
3508                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3509                                  EVENT_INDEX(5)));
3510         radeon_ring_write(ring, addr & 0xfffffffc);
3511         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3512         radeon_ring_write(ring, fence->seq);
3513         radeon_ring_write(ring, 0);
3514         /* HDP flush */
3515         /* We should be using the new WAIT_REG_MEM special op packet here
3516          * but it causes the CP to hang
3517          */
3518         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3519         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3520                                  WRITE_DATA_DST_SEL(0)));
3521         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3522         radeon_ring_write(ring, 0);
3523         radeon_ring_write(ring, 0);
3524 }
3525
3526 /**
3527  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3528  *
3529  * @rdev: radeon_device pointer
3530  * @fence: radeon fence object
3531  *
3532  * Emits a fence sequnce number on the compute ring and flushes
3533  * GPU caches.
3534  */
3535 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3536                                  struct radeon_fence *fence)
3537 {
3538         struct radeon_ring *ring = &rdev->ring[fence->ring];
3539         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3540
3541         /* RELEASE_MEM - flush caches, send int */
3542         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3543         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3544                                  EOP_TC_ACTION_EN |
3545                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3546                                  EVENT_INDEX(5)));
3547         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3548         radeon_ring_write(ring, addr & 0xfffffffc);
3549         radeon_ring_write(ring, upper_32_bits(addr));
3550         radeon_ring_write(ring, fence->seq);
3551         radeon_ring_write(ring, 0);
3552         /* HDP flush */
3553         /* We should be using the new WAIT_REG_MEM special op packet here
3554          * but it causes the CP to hang
3555          */
3556         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3557         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3558                                  WRITE_DATA_DST_SEL(0)));
3559         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3560         radeon_ring_write(ring, 0);
3561         radeon_ring_write(ring, 0);
3562 }
3563
3564 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3565                              struct radeon_ring *ring,
3566                              struct radeon_semaphore *semaphore,
3567                              bool emit_wait)
3568 {
3569 /* TODO: figure out why semaphore cause lockups */
3570 #if 0
3571         uint64_t addr = semaphore->gpu_addr;
3572         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3573
3574         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3575         radeon_ring_write(ring, addr & 0xffffffff);
3576         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3577
3578         return true;
3579 #else
3580         return false;
3581 #endif
3582 }
3583
3584 /**
3585  * cik_copy_cpdma - copy pages using the CP DMA engine
3586  *
3587  * @rdev: radeon_device pointer
3588  * @src_offset: src GPU address
3589  * @dst_offset: dst GPU address
3590  * @num_gpu_pages: number of GPU pages to xfer
3591  * @fence: radeon fence object
3592  *
3593  * Copy GPU paging using the CP DMA engine (CIK+).
3594  * Used by the radeon ttm implementation to move pages if
3595  * registered as the asic copy callback.
3596  */
3597 int cik_copy_cpdma(struct radeon_device *rdev,
3598                    uint64_t src_offset, uint64_t dst_offset,
3599                    unsigned num_gpu_pages,
3600                    struct radeon_fence **fence)
3601 {
3602         struct radeon_semaphore *sem = NULL;
3603         int ring_index = rdev->asic->copy.blit_ring_index;
3604         struct radeon_ring *ring = &rdev->ring[ring_index];
3605         u32 size_in_bytes, cur_size_in_bytes, control;
3606         int i, num_loops;
3607         int r = 0;
3608
3609         r = radeon_semaphore_create(rdev, &sem);
3610         if (r) {
3611                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3612                 return r;
3613         }
3614
3615         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3616         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3617         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3618         if (r) {
3619                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3620                 radeon_semaphore_free(rdev, &sem, NULL);
3621                 return r;
3622         }
3623
3624         radeon_semaphore_sync_to(sem, *fence);
3625         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3626
3627         for (i = 0; i < num_loops; i++) {
3628                 cur_size_in_bytes = size_in_bytes;
3629                 if (cur_size_in_bytes > 0x1fffff)
3630                         cur_size_in_bytes = 0x1fffff;
3631                 size_in_bytes -= cur_size_in_bytes;
3632                 control = 0;
3633                 if (size_in_bytes == 0)
3634                         control |= PACKET3_DMA_DATA_CP_SYNC;
3635                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3636                 radeon_ring_write(ring, control);
3637                 radeon_ring_write(ring, lower_32_bits(src_offset));
3638                 radeon_ring_write(ring, upper_32_bits(src_offset));
3639                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3640                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3641                 radeon_ring_write(ring, cur_size_in_bytes);
3642                 src_offset += cur_size_in_bytes;
3643                 dst_offset += cur_size_in_bytes;
3644         }
3645
3646         r = radeon_fence_emit(rdev, fence, ring->idx);
3647         if (r) {
3648                 radeon_ring_unlock_undo(rdev, ring);
3649                 return r;
3650         }
3651
3652         radeon_ring_unlock_commit(rdev, ring);
3653         radeon_semaphore_free(rdev, &sem, *fence);
3654
3655         return r;
3656 }
3657
3658 /*
3659  * IB stuff
3660  */
3661 /**
3662  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3663  *
3664  * @rdev: radeon_device pointer
3665  * @ib: radeon indirect buffer object
3666  *
3667  * Emits an DE (drawing engine) or CE (constant engine) IB
3668  * on the gfx ring.  IBs are usually generated by userspace
3669  * acceleration drivers and submitted to the kernel for
3670  * sheduling on the ring.  This function schedules the IB
3671  * on the gfx ring for execution by the GPU.
3672  */
3673 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3674 {
3675         struct radeon_ring *ring = &rdev->ring[ib->ring];
3676         u32 header, control = INDIRECT_BUFFER_VALID;
3677
3678         if (ib->is_const_ib) {
3679                 /* set switch buffer packet before const IB */
3680                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3681                 radeon_ring_write(ring, 0);
3682
3683                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3684         } else {
3685                 u32 next_rptr;
3686                 if (ring->rptr_save_reg) {
3687                         next_rptr = ring->wptr + 3 + 4;
3688                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3689                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3690                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3691                         radeon_ring_write(ring, next_rptr);
3692                 } else if (rdev->wb.enabled) {
3693                         next_rptr = ring->wptr + 5 + 4;
3694                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3695                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3696                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3697                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3698                         radeon_ring_write(ring, next_rptr);
3699                 }
3700
3701                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3702         }
3703
3704         control |= ib->length_dw |
3705                 (ib->vm ? (ib->vm->id << 24) : 0);
3706
3707         radeon_ring_write(ring, header);
3708         radeon_ring_write(ring,
3709 #ifdef __BIG_ENDIAN
3710                           (2 << 0) |
3711 #endif
3712                           (ib->gpu_addr & 0xFFFFFFFC));
3713         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3714         radeon_ring_write(ring, control);
3715 }
3716
3717 /**
3718  * cik_ib_test - basic gfx ring IB test
3719  *
3720  * @rdev: radeon_device pointer
3721  * @ring: radeon_ring structure holding ring information
3722  *
3723  * Allocate an IB and execute it on the gfx ring (CIK).
3724  * Provides a basic gfx ring test to verify that IBs are working.
3725  * Returns 0 on success, error on failure.
3726  */
3727 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3728 {
3729         struct radeon_ib ib;
3730         uint32_t scratch;
3731         uint32_t tmp = 0;
3732         unsigned i;
3733         int r;
3734
3735         r = radeon_scratch_get(rdev, &scratch);
3736         if (r) {
3737                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3738                 return r;
3739         }
3740         WREG32(scratch, 0xCAFEDEAD);
3741         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3742         if (r) {
3743                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3744                 radeon_scratch_free(rdev, scratch);
3745                 return r;
3746         }
3747         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3748         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3749         ib.ptr[2] = 0xDEADBEEF;
3750         ib.length_dw = 3;
3751         r = radeon_ib_schedule(rdev, &ib, NULL);
3752         if (r) {
3753                 radeon_scratch_free(rdev, scratch);
3754                 radeon_ib_free(rdev, &ib);
3755                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3756                 return r;
3757         }
3758         r = radeon_fence_wait(ib.fence, false);
3759         if (r) {
3760                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3761                 radeon_scratch_free(rdev, scratch);
3762                 radeon_ib_free(rdev, &ib);
3763                 return r;
3764         }
3765         for (i = 0; i < rdev->usec_timeout; i++) {
3766                 tmp = RREG32(scratch);
3767                 if (tmp == 0xDEADBEEF)
3768                         break;
3769                 DRM_UDELAY(1);
3770         }
3771         if (i < rdev->usec_timeout) {
3772                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3773         } else {
3774                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3775                           scratch, tmp);
3776                 r = -EINVAL;
3777         }
3778         radeon_scratch_free(rdev, scratch);
3779         radeon_ib_free(rdev, &ib);
3780         return r;
3781 }
3782
3783 /*
3784  * CP.
3785  * On CIK, gfx and compute now have independant command processors.
3786  *
3787  * GFX
3788  * Gfx consists of a single ring and can process both gfx jobs and
3789  * compute jobs.  The gfx CP consists of three microengines (ME):
3790  * PFP - Pre-Fetch Parser
3791  * ME - Micro Engine
3792  * CE - Constant Engine
3793  * The PFP and ME make up what is considered the Drawing Engine (DE).
3794  * The CE is an asynchronous engine used for updating buffer desciptors
3795  * used by the DE so that they can be loaded into cache in parallel
3796  * while the DE is processing state update packets.
3797  *
3798  * Compute
3799  * The compute CP consists of two microengines (ME):
3800  * MEC1 - Compute MicroEngine 1
3801  * MEC2 - Compute MicroEngine 2
3802  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3803  * The queues are exposed to userspace and are programmed directly
3804  * by the compute runtime.
3805  */
3806 /**
3807  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3808  *
3809  * @rdev: radeon_device pointer
3810  * @enable: enable or disable the MEs
3811  *
3812  * Halts or unhalts the gfx MEs.
3813  */
3814 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3815 {
3816         if (enable)
3817                 WREG32(CP_ME_CNTL, 0);
3818         else {
3819                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3820                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3821         }
3822         udelay(50);
3823 }
3824
3825 /**
3826  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3827  *
3828  * @rdev: radeon_device pointer
3829  *
3830  * Loads the gfx PFP, ME, and CE ucode.
3831  * Returns 0 for success, -EINVAL if the ucode is not available.
3832  */
3833 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3834 {
3835         const __be32 *fw_data;
3836         int i;
3837
3838         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3839                 return -EINVAL;
3840
3841         cik_cp_gfx_enable(rdev, false);
3842
3843         /* PFP */
3844         fw_data = (const __be32 *)rdev->pfp_fw->data;
3845         WREG32(CP_PFP_UCODE_ADDR, 0);
3846         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3847                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3848         WREG32(CP_PFP_UCODE_ADDR, 0);
3849
3850         /* CE */
3851         fw_data = (const __be32 *)rdev->ce_fw->data;
3852         WREG32(CP_CE_UCODE_ADDR, 0);
3853         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3854                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3855         WREG32(CP_CE_UCODE_ADDR, 0);
3856
3857         /* ME */
3858         fw_data = (const __be32 *)rdev->me_fw->data;
3859         WREG32(CP_ME_RAM_WADDR, 0);
3860         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3861                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3862         WREG32(CP_ME_RAM_WADDR, 0);
3863
3864         WREG32(CP_PFP_UCODE_ADDR, 0);
3865         WREG32(CP_CE_UCODE_ADDR, 0);
3866         WREG32(CP_ME_RAM_WADDR, 0);
3867         WREG32(CP_ME_RAM_RADDR, 0);
3868         return 0;
3869 }
3870
3871 /**
3872  * cik_cp_gfx_start - start the gfx ring
3873  *
3874  * @rdev: radeon_device pointer
3875  *
3876  * Enables the ring and loads the clear state context and other
3877  * packets required to init the ring.
3878  * Returns 0 for success, error for failure.
3879  */
3880 static int cik_cp_gfx_start(struct radeon_device *rdev)
3881 {
3882         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3883         int r, i;
3884
3885         /* init the CP */
3886         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3887         WREG32(CP_ENDIAN_SWAP, 0);
3888         WREG32(CP_DEVICE_ID, 1);
3889
3890         cik_cp_gfx_enable(rdev, true);
3891
3892         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3893         if (r) {
3894                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3895                 return r;
3896         }
3897
3898         /* init the CE partitions.  CE only used for gfx on CIK */
3899         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3900         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3901         radeon_ring_write(ring, 0xc000);
3902         radeon_ring_write(ring, 0xc000);
3903
3904         /* setup clear context state */
3905         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3906         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3907
3908         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3909         radeon_ring_write(ring, 0x80000000);
3910         radeon_ring_write(ring, 0x80000000);
3911
3912         for (i = 0; i < cik_default_size; i++)
3913                 radeon_ring_write(ring, cik_default_state[i]);
3914
3915         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3916         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3917
3918         /* set clear context state */
3919         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3920         radeon_ring_write(ring, 0);
3921
3922         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3923         radeon_ring_write(ring, 0x00000316);
3924         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3925         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3926
3927         radeon_ring_unlock_commit(rdev, ring);
3928
3929         return 0;
3930 }
3931
3932 /**
3933  * cik_cp_gfx_fini - stop the gfx ring
3934  *
3935  * @rdev: radeon_device pointer
3936  *
3937  * Stop the gfx ring and tear down the driver ring
3938  * info.
3939  */
3940 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3941 {
3942         cik_cp_gfx_enable(rdev, false);
3943         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3944 }
3945
3946 /**
3947  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3948  *
3949  * @rdev: radeon_device pointer
3950  *
3951  * Program the location and size of the gfx ring buffer
3952  * and test it to make sure it's working.
3953  * Returns 0 for success, error for failure.
3954  */
3955 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3956 {
3957         struct radeon_ring *ring;
3958         u32 tmp;
3959         u32 rb_bufsz;
3960         u64 rb_addr;
3961         int r;
3962
3963         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3964         if (rdev->family != CHIP_HAWAII)
3965                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3966
3967         /* Set the write pointer delay */
3968         WREG32(CP_RB_WPTR_DELAY, 0);
3969
3970         /* set the RB to use vmid 0 */
3971         WREG32(CP_RB_VMID, 0);
3972
3973         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3974
3975         /* ring 0 - compute and gfx */
3976         /* Set ring buffer size */
3977         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3978         rb_bufsz = order_base_2(ring->ring_size / 8);
3979         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3980 #ifdef __BIG_ENDIAN
3981         tmp |= BUF_SWAP_32BIT;
3982 #endif
3983         WREG32(CP_RB0_CNTL, tmp);
3984
3985         /* Initialize the ring buffer's read and write pointers */
3986         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3987         ring->wptr = 0;
3988         WREG32(CP_RB0_WPTR, ring->wptr);
3989
3990         /* set the wb address wether it's enabled or not */
3991         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3992         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3993
3994         /* scratch register shadowing is no longer supported */
3995         WREG32(SCRATCH_UMSK, 0);
3996
3997         if (!rdev->wb.enabled)
3998                 tmp |= RB_NO_UPDATE;
3999
4000         mdelay(1);
4001         WREG32(CP_RB0_CNTL, tmp);
4002
4003         rb_addr = ring->gpu_addr >> 8;
4004         WREG32(CP_RB0_BASE, rb_addr);
4005         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4006
4007         ring->rptr = RREG32(CP_RB0_RPTR);
4008
4009         /* start the ring */
4010         cik_cp_gfx_start(rdev);
4011         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4012         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4013         if (r) {
4014                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4015                 return r;
4016         }
4017         return 0;
4018 }
4019
4020 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
4021                               struct radeon_ring *ring)
4022 {
4023         u32 rptr;
4024
4025
4026
4027         if (rdev->wb.enabled) {
4028                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
4029         } else {
4030                 mutex_lock(&rdev->srbm_mutex);
4031                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4032                 rptr = RREG32(CP_HQD_PQ_RPTR);
4033                 cik_srbm_select(rdev, 0, 0, 0, 0);
4034                 mutex_unlock(&rdev->srbm_mutex);
4035         }
4036
4037         return rptr;
4038 }
4039
4040 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
4041                               struct radeon_ring *ring)
4042 {
4043         u32 wptr;
4044
4045         if (rdev->wb.enabled) {
4046                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
4047         } else {
4048                 mutex_lock(&rdev->srbm_mutex);
4049                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4050                 wptr = RREG32(CP_HQD_PQ_WPTR);
4051                 cik_srbm_select(rdev, 0, 0, 0, 0);
4052                 mutex_unlock(&rdev->srbm_mutex);
4053         }
4054
4055         return wptr;
4056 }
4057
4058 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
4059                                struct radeon_ring *ring)
4060 {
4061         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
4062         WDOORBELL32(ring->doorbell_index, ring->wptr);
4063 }
4064
4065 /**
4066  * cik_cp_compute_enable - enable/disable the compute CP MEs
4067  *
4068  * @rdev: radeon_device pointer
4069  * @enable: enable or disable the MEs
4070  *
4071  * Halts or unhalts the compute MEs.
4072  */
4073 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4074 {
4075         if (enable)
4076                 WREG32(CP_MEC_CNTL, 0);
4077         else
4078                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4079         udelay(50);
4080 }
4081
4082 /**
4083  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4084  *
4085  * @rdev: radeon_device pointer
4086  *
4087  * Loads the compute MEC1&2 ucode.
4088  * Returns 0 for success, -EINVAL if the ucode is not available.
4089  */
4090 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4091 {
4092         const __be32 *fw_data;
4093         int i;
4094
4095         if (!rdev->mec_fw)
4096                 return -EINVAL;
4097
4098         cik_cp_compute_enable(rdev, false);
4099
4100         /* MEC1 */
4101         fw_data = (const __be32 *)rdev->mec_fw->data;
4102         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4103         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4104                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4105         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4106
4107         if (rdev->family == CHIP_KAVERI) {
4108                 /* MEC2 */
4109                 fw_data = (const __be32 *)rdev->mec_fw->data;
4110                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4111                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4112                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4113                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4114         }
4115
4116         return 0;
4117 }
4118
4119 /**
4120  * cik_cp_compute_start - start the compute queues
4121  *
4122  * @rdev: radeon_device pointer
4123  *
4124  * Enable the compute queues.
4125  * Returns 0 for success, error for failure.
4126  */
4127 static int cik_cp_compute_start(struct radeon_device *rdev)
4128 {
4129         cik_cp_compute_enable(rdev, true);
4130
4131         return 0;
4132 }
4133
4134 /**
4135  * cik_cp_compute_fini - stop the compute queues
4136  *
4137  * @rdev: radeon_device pointer
4138  *
4139  * Stop the compute queues and tear down the driver queue
4140  * info.
4141  */
4142 static void cik_cp_compute_fini(struct radeon_device *rdev)
4143 {
4144         int i, idx, r;
4145
4146         cik_cp_compute_enable(rdev, false);
4147
4148         for (i = 0; i < 2; i++) {
4149                 if (i == 0)
4150                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4151                 else
4152                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4153
4154                 if (rdev->ring[idx].mqd_obj) {
4155                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4156                         if (unlikely(r != 0))
4157                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4158
4159                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4160                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4161
4162                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4163                         rdev->ring[idx].mqd_obj = NULL;
4164                 }
4165         }
4166 }
4167
4168 static void cik_mec_fini(struct radeon_device *rdev)
4169 {
4170         int r;
4171
4172         if (rdev->mec.hpd_eop_obj) {
4173                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4174                 if (unlikely(r != 0))
4175                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4176                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4177                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4178
4179                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4180                 rdev->mec.hpd_eop_obj = NULL;
4181         }
4182 }
4183
4184 #define MEC_HPD_SIZE 2048
4185
4186 static int cik_mec_init(struct radeon_device *rdev)
4187 {
4188         int r;
4189         u32 *hpd;
4190
4191         /*
4192          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4193          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4194          */
4195         if (rdev->family == CHIP_KAVERI)
4196                 rdev->mec.num_mec = 2;
4197         else
4198                 rdev->mec.num_mec = 1;
4199         rdev->mec.num_pipe = 4;
4200         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4201
4202         if (rdev->mec.hpd_eop_obj == NULL) {
4203                 r = radeon_bo_create(rdev,
4204                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4205                                      PAGE_SIZE, true,
4206                                      RADEON_GEM_DOMAIN_GTT, NULL,
4207                                      &rdev->mec.hpd_eop_obj);
4208                 if (r) {
4209                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4210                         return r;
4211                 }
4212         }
4213
4214         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4215         if (unlikely(r != 0)) {
4216                 cik_mec_fini(rdev);
4217                 return r;
4218         }
4219         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4220                           &rdev->mec.hpd_eop_gpu_addr);
4221         if (r) {
4222                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4223                 cik_mec_fini(rdev);
4224                 return r;
4225         }
4226         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4227         if (r) {
4228                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4229                 cik_mec_fini(rdev);
4230                 return r;
4231         }
4232
4233         /* clear memory.  Not sure if this is required or not */
4234         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4235
4236         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4237         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4238
4239         return 0;
4240 }
4241
4242 struct hqd_registers
4243 {
4244         u32 cp_mqd_base_addr;
4245         u32 cp_mqd_base_addr_hi;
4246         u32 cp_hqd_active;
4247         u32 cp_hqd_vmid;
4248         u32 cp_hqd_persistent_state;
4249         u32 cp_hqd_pipe_priority;
4250         u32 cp_hqd_queue_priority;
4251         u32 cp_hqd_quantum;
4252         u32 cp_hqd_pq_base;
4253         u32 cp_hqd_pq_base_hi;
4254         u32 cp_hqd_pq_rptr;
4255         u32 cp_hqd_pq_rptr_report_addr;
4256         u32 cp_hqd_pq_rptr_report_addr_hi;
4257         u32 cp_hqd_pq_wptr_poll_addr;
4258         u32 cp_hqd_pq_wptr_poll_addr_hi;
4259         u32 cp_hqd_pq_doorbell_control;
4260         u32 cp_hqd_pq_wptr;
4261         u32 cp_hqd_pq_control;
4262         u32 cp_hqd_ib_base_addr;
4263         u32 cp_hqd_ib_base_addr_hi;
4264         u32 cp_hqd_ib_rptr;
4265         u32 cp_hqd_ib_control;
4266         u32 cp_hqd_iq_timer;
4267         u32 cp_hqd_iq_rptr;
4268         u32 cp_hqd_dequeue_request;
4269         u32 cp_hqd_dma_offload;
4270         u32 cp_hqd_sema_cmd;
4271         u32 cp_hqd_msg_type;
4272         u32 cp_hqd_atomic0_preop_lo;
4273         u32 cp_hqd_atomic0_preop_hi;
4274         u32 cp_hqd_atomic1_preop_lo;
4275         u32 cp_hqd_atomic1_preop_hi;
4276         u32 cp_hqd_hq_scheduler0;
4277         u32 cp_hqd_hq_scheduler1;
4278         u32 cp_mqd_control;
4279 };
4280
4281 struct bonaire_mqd
4282 {
4283         u32 header;
4284         u32 dispatch_initiator;
4285         u32 dimensions[3];
4286         u32 start_idx[3];
4287         u32 num_threads[3];
4288         u32 pipeline_stat_enable;
4289         u32 perf_counter_enable;
4290         u32 pgm[2];
4291         u32 tba[2];
4292         u32 tma[2];
4293         u32 pgm_rsrc[2];
4294         u32 vmid;
4295         u32 resource_limits;
4296         u32 static_thread_mgmt01[2];
4297         u32 tmp_ring_size;
4298         u32 static_thread_mgmt23[2];
4299         u32 restart[3];
4300         u32 thread_trace_enable;
4301         u32 reserved1;
4302         u32 user_data[16];
4303         u32 vgtcs_invoke_count[2];
4304         struct hqd_registers queue_state;
4305         u32 dequeue_cntr;
4306         u32 interrupt_queue[64];
4307 };
4308
4309 /**
4310  * cik_cp_compute_resume - setup the compute queue registers
4311  *
4312  * @rdev: radeon_device pointer
4313  *
4314  * Program the compute queues and test them to make sure they
4315  * are working.
4316  * Returns 0 for success, error for failure.
4317  */
4318 static int cik_cp_compute_resume(struct radeon_device *rdev)
4319 {
4320         int r, i, idx;
4321         u32 tmp;
4322         bool use_doorbell = true;
4323         u64 hqd_gpu_addr;
4324         u64 mqd_gpu_addr;
4325         u64 eop_gpu_addr;
4326         u64 wb_gpu_addr;
4327         u32 *buf;
4328         struct bonaire_mqd *mqd;
4329
4330         r = cik_cp_compute_start(rdev);
4331         if (r)
4332                 return r;
4333
4334         /* fix up chicken bits */
4335         tmp = RREG32(CP_CPF_DEBUG);
4336         tmp |= (1 << 23);
4337         WREG32(CP_CPF_DEBUG, tmp);
4338
4339         /* init the pipes */
4340         mutex_lock(&rdev->srbm_mutex);
4341         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4342                 int me = (i < 4) ? 1 : 2;
4343                 int pipe = (i < 4) ? i : (i - 4);
4344
4345                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4346
4347                 cik_srbm_select(rdev, me, pipe, 0, 0);
4348
4349                 /* write the EOP addr */
4350                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4351                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4352
4353                 /* set the VMID assigned */
4354                 WREG32(CP_HPD_EOP_VMID, 0);
4355
4356                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4357                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4358                 tmp &= ~EOP_SIZE_MASK;
4359                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4360                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4361         }
4362         cik_srbm_select(rdev, 0, 0, 0, 0);
4363         mutex_unlock(&rdev->srbm_mutex);
4364
4365         /* init the queues.  Just two for now. */
4366         for (i = 0; i < 2; i++) {
4367                 if (i == 0)
4368                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4369                 else
4370                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4371
4372                 if (rdev->ring[idx].mqd_obj == NULL) {
4373                         r = radeon_bo_create(rdev,
4374                                              sizeof(struct bonaire_mqd),
4375                                              PAGE_SIZE, true,
4376                                              RADEON_GEM_DOMAIN_GTT, NULL,
4377                                              &rdev->ring[idx].mqd_obj);
4378                         if (r) {
4379                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4380                                 return r;
4381                         }
4382                 }
4383
4384                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4385                 if (unlikely(r != 0)) {
4386                         cik_cp_compute_fini(rdev);
4387                         return r;
4388                 }
4389                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4390                                   &mqd_gpu_addr);
4391                 if (r) {
4392                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4393                         cik_cp_compute_fini(rdev);
4394                         return r;
4395                 }
4396                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4397                 if (r) {
4398                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4399                         cik_cp_compute_fini(rdev);
4400                         return r;
4401                 }
4402
4403                 /* init the mqd struct */
4404                 memset(buf, 0, sizeof(struct bonaire_mqd));
4405
4406                 mqd = (struct bonaire_mqd *)buf;
4407                 mqd->header = 0xC0310800;
4408                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4409                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4410                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4411                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4412
4413                 mutex_lock(&rdev->srbm_mutex);
4414                 cik_srbm_select(rdev, rdev->ring[idx].me,
4415                                 rdev->ring[idx].pipe,
4416                                 rdev->ring[idx].queue, 0);
4417
4418                 /* disable wptr polling */
4419                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4420                 tmp &= ~WPTR_POLL_EN;
4421                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4422
4423                 /* enable doorbell? */
4424                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4425                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4426                 if (use_doorbell)
4427                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4428                 else
4429                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4430                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4431                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4432
4433                 /* disable the queue if it's active */
4434                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4435                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4436                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4437                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4438                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4439                         for (i = 0; i < rdev->usec_timeout; i++) {
4440                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4441                                         break;
4442                                 udelay(1);
4443                         }
4444                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4445                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4446                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4447                 }
4448
4449                 /* set the pointer to the MQD */
4450                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4451                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4452                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4453                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4454                 /* set MQD vmid to 0 */
4455                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4456                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4457                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4458
4459                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4460                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4461                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4462                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4463                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4464                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4465
4466                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4467                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4468                 mqd->queue_state.cp_hqd_pq_control &=
4469                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4470
4471                 mqd->queue_state.cp_hqd_pq_control |=
4472                         order_base_2(rdev->ring[idx].ring_size / 8);
4473                 mqd->queue_state.cp_hqd_pq_control |=
4474                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4475 #ifdef __BIG_ENDIAN
4476                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4477 #endif
4478                 mqd->queue_state.cp_hqd_pq_control &=
4479                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4480                 mqd->queue_state.cp_hqd_pq_control |=
4481                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4482                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4483
4484                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4485                 if (i == 0)
4486                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4487                 else
4488                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4489                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4490                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4491                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4492                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4493                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4494
4495                 /* set the wb address wether it's enabled or not */
4496                 if (i == 0)
4497                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4498                 else
4499                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4500                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4501                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4502                         upper_32_bits(wb_gpu_addr) & 0xffff;
4503                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4504                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4505                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4506                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4507
4508                 /* enable the doorbell if requested */
4509                 if (use_doorbell) {
4510                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4511                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4512                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4513                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4514                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4515                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4516                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4517                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4518
4519                 } else {
4520                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4521                 }
4522                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4523                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4524
4525                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4526                 rdev->ring[idx].wptr = 0;
4527                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4528                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4529                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4530                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4531
4532                 /* set the vmid for the queue */
4533                 mqd->queue_state.cp_hqd_vmid = 0;
4534                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4535
4536                 /* activate the queue */
4537                 mqd->queue_state.cp_hqd_active = 1;
4538                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4539
4540                 cik_srbm_select(rdev, 0, 0, 0, 0);
4541                 mutex_unlock(&rdev->srbm_mutex);
4542
4543                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4544                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4545
4546                 rdev->ring[idx].ready = true;
4547                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4548                 if (r)
4549                         rdev->ring[idx].ready = false;
4550         }
4551
4552         return 0;
4553 }
4554
4555 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4556 {
4557         cik_cp_gfx_enable(rdev, enable);
4558         cik_cp_compute_enable(rdev, enable);
4559 }
4560
4561 static int cik_cp_load_microcode(struct radeon_device *rdev)
4562 {
4563         int r;
4564
4565         r = cik_cp_gfx_load_microcode(rdev);
4566         if (r)
4567                 return r;
4568         r = cik_cp_compute_load_microcode(rdev);
4569         if (r)
4570                 return r;
4571
4572         return 0;
4573 }
4574
4575 static void cik_cp_fini(struct radeon_device *rdev)
4576 {
4577         cik_cp_gfx_fini(rdev);
4578         cik_cp_compute_fini(rdev);
4579 }
4580
4581 static int cik_cp_resume(struct radeon_device *rdev)
4582 {
4583         int r;
4584
4585         cik_enable_gui_idle_interrupt(rdev, false);
4586
4587         r = cik_cp_load_microcode(rdev);
4588         if (r)
4589                 return r;
4590
4591         r = cik_cp_gfx_resume(rdev);
4592         if (r)
4593                 return r;
4594         r = cik_cp_compute_resume(rdev);
4595         if (r)
4596                 return r;
4597
4598         cik_enable_gui_idle_interrupt(rdev, true);
4599
4600         return 0;
4601 }
4602
4603 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4604 {
4605         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4606                 RREG32(GRBM_STATUS));
4607         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4608                 RREG32(GRBM_STATUS2));
4609         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4610                 RREG32(GRBM_STATUS_SE0));
4611         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4612                 RREG32(GRBM_STATUS_SE1));
4613         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4614                 RREG32(GRBM_STATUS_SE2));
4615         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4616                 RREG32(GRBM_STATUS_SE3));
4617         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4618                 RREG32(SRBM_STATUS));
4619         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4620                 RREG32(SRBM_STATUS2));
4621         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4622                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4623         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4624                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4625         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4626         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4627                  RREG32(CP_STALLED_STAT1));
4628         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4629                  RREG32(CP_STALLED_STAT2));
4630         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4631                  RREG32(CP_STALLED_STAT3));
4632         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4633                  RREG32(CP_CPF_BUSY_STAT));
4634         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4635                  RREG32(CP_CPF_STALLED_STAT1));
4636         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4637         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4638         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4639                  RREG32(CP_CPC_STALLED_STAT1));
4640         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4641 }
4642
4643 /**
4644  * cik_gpu_check_soft_reset - check which blocks are busy
4645  *
4646  * @rdev: radeon_device pointer
4647  *
4648  * Check which blocks are busy and return the relevant reset
4649  * mask to be used by cik_gpu_soft_reset().
4650  * Returns a mask of the blocks to be reset.
4651  */
4652 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4653 {
4654         u32 reset_mask = 0;
4655         u32 tmp;
4656
4657         /* GRBM_STATUS */
4658         tmp = RREG32(GRBM_STATUS);
4659         if (tmp & (PA_BUSY | SC_BUSY |
4660                    BCI_BUSY | SX_BUSY |
4661                    TA_BUSY | VGT_BUSY |
4662                    DB_BUSY | CB_BUSY |
4663                    GDS_BUSY | SPI_BUSY |
4664                    IA_BUSY | IA_BUSY_NO_DMA))
4665                 reset_mask |= RADEON_RESET_GFX;
4666
4667         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4668                 reset_mask |= RADEON_RESET_CP;
4669
4670         /* GRBM_STATUS2 */
4671         tmp = RREG32(GRBM_STATUS2);
4672         if (tmp & RLC_BUSY)
4673                 reset_mask |= RADEON_RESET_RLC;
4674
4675         /* SDMA0_STATUS_REG */
4676         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4677         if (!(tmp & SDMA_IDLE))
4678                 reset_mask |= RADEON_RESET_DMA;
4679
4680         /* SDMA1_STATUS_REG */
4681         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4682         if (!(tmp & SDMA_IDLE))
4683                 reset_mask |= RADEON_RESET_DMA1;
4684
4685         /* SRBM_STATUS2 */
4686         tmp = RREG32(SRBM_STATUS2);
4687         if (tmp & SDMA_BUSY)
4688                 reset_mask |= RADEON_RESET_DMA;
4689
4690         if (tmp & SDMA1_BUSY)
4691                 reset_mask |= RADEON_RESET_DMA1;
4692
4693         /* SRBM_STATUS */
4694         tmp = RREG32(SRBM_STATUS);
4695
4696         if (tmp & IH_BUSY)
4697                 reset_mask |= RADEON_RESET_IH;
4698
4699         if (tmp & SEM_BUSY)
4700                 reset_mask |= RADEON_RESET_SEM;
4701
4702         if (tmp & GRBM_RQ_PENDING)
4703                 reset_mask |= RADEON_RESET_GRBM;
4704
4705         if (tmp & VMC_BUSY)
4706                 reset_mask |= RADEON_RESET_VMC;
4707
4708         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4709                    MCC_BUSY | MCD_BUSY))
4710                 reset_mask |= RADEON_RESET_MC;
4711
4712         if (evergreen_is_display_hung(rdev))
4713                 reset_mask |= RADEON_RESET_DISPLAY;
4714
4715         /* Skip MC reset as it's mostly likely not hung, just busy */
4716         if (reset_mask & RADEON_RESET_MC) {
4717                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4718                 reset_mask &= ~RADEON_RESET_MC;
4719         }
4720
4721         return reset_mask;
4722 }
4723
4724 /**
4725  * cik_gpu_soft_reset - soft reset GPU
4726  *
4727  * @rdev: radeon_device pointer
4728  * @reset_mask: mask of which blocks to reset
4729  *
4730  * Soft reset the blocks specified in @reset_mask.
4731  */
4732 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4733 {
4734         struct evergreen_mc_save save;
4735         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4736         u32 tmp;
4737
4738         if (reset_mask == 0)
4739                 return;
4740
4741         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4742
4743         cik_print_gpu_status_regs(rdev);
4744         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4745                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4746         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4747                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4748
4749         /* disable CG/PG */
4750         cik_fini_pg(rdev);
4751         cik_fini_cg(rdev);
4752
4753         /* stop the rlc */
4754         cik_rlc_stop(rdev);
4755
4756         /* Disable GFX parsing/prefetching */
4757         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4758
4759         /* Disable MEC parsing/prefetching */
4760         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4761
4762         if (reset_mask & RADEON_RESET_DMA) {
4763                 /* sdma0 */
4764                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4765                 tmp |= SDMA_HALT;
4766                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4767         }
4768         if (reset_mask & RADEON_RESET_DMA1) {
4769                 /* sdma1 */
4770                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4771                 tmp |= SDMA_HALT;
4772                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4773         }
4774
4775         evergreen_mc_stop(rdev, &save);
4776         if (evergreen_mc_wait_for_idle(rdev)) {
4777                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4778         }
4779
4780         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4781                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4782
4783         if (reset_mask & RADEON_RESET_CP) {
4784                 grbm_soft_reset |= SOFT_RESET_CP;
4785
4786                 srbm_soft_reset |= SOFT_RESET_GRBM;
4787         }
4788
4789         if (reset_mask & RADEON_RESET_DMA)
4790                 srbm_soft_reset |= SOFT_RESET_SDMA;
4791
4792         if (reset_mask & RADEON_RESET_DMA1)
4793                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4794
4795         if (reset_mask & RADEON_RESET_DISPLAY)
4796                 srbm_soft_reset |= SOFT_RESET_DC;
4797
4798         if (reset_mask & RADEON_RESET_RLC)
4799                 grbm_soft_reset |= SOFT_RESET_RLC;
4800
4801         if (reset_mask & RADEON_RESET_SEM)
4802                 srbm_soft_reset |= SOFT_RESET_SEM;
4803
4804         if (reset_mask & RADEON_RESET_IH)
4805                 srbm_soft_reset |= SOFT_RESET_IH;
4806
4807         if (reset_mask & RADEON_RESET_GRBM)
4808                 srbm_soft_reset |= SOFT_RESET_GRBM;
4809
4810         if (reset_mask & RADEON_RESET_VMC)
4811                 srbm_soft_reset |= SOFT_RESET_VMC;
4812
4813         if (!(rdev->flags & RADEON_IS_IGP)) {
4814                 if (reset_mask & RADEON_RESET_MC)
4815                         srbm_soft_reset |= SOFT_RESET_MC;
4816         }
4817
4818         if (grbm_soft_reset) {
4819                 tmp = RREG32(GRBM_SOFT_RESET);
4820                 tmp |= grbm_soft_reset;
4821                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4822                 WREG32(GRBM_SOFT_RESET, tmp);
4823                 tmp = RREG32(GRBM_SOFT_RESET);
4824
4825                 udelay(50);
4826
4827                 tmp &= ~grbm_soft_reset;
4828                 WREG32(GRBM_SOFT_RESET, tmp);
4829                 tmp = RREG32(GRBM_SOFT_RESET);
4830         }
4831
4832         if (srbm_soft_reset) {
4833                 tmp = RREG32(SRBM_SOFT_RESET);
4834                 tmp |= srbm_soft_reset;
4835                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4836                 WREG32(SRBM_SOFT_RESET, tmp);
4837                 tmp = RREG32(SRBM_SOFT_RESET);
4838
4839                 udelay(50);
4840
4841                 tmp &= ~srbm_soft_reset;
4842                 WREG32(SRBM_SOFT_RESET, tmp);
4843                 tmp = RREG32(SRBM_SOFT_RESET);
4844         }
4845
4846         /* Wait a little for things to settle down */
4847         udelay(50);
4848
4849         evergreen_mc_resume(rdev, &save);
4850         udelay(50);
4851
4852         cik_print_gpu_status_regs(rdev);
4853 }
4854
4855 /**
4856  * cik_asic_reset - soft reset GPU
4857  *
4858  * @rdev: radeon_device pointer
4859  *
4860  * Look up which blocks are hung and attempt
4861  * to reset them.
4862  * Returns 0 for success.
4863  */
4864 int cik_asic_reset(struct radeon_device *rdev)
4865 {
4866         u32 reset_mask;
4867
4868         reset_mask = cik_gpu_check_soft_reset(rdev);
4869
4870         if (reset_mask)
4871                 r600_set_bios_scratch_engine_hung(rdev, true);
4872
4873         cik_gpu_soft_reset(rdev, reset_mask);
4874
4875         reset_mask = cik_gpu_check_soft_reset(rdev);
4876
4877         if (!reset_mask)
4878                 r600_set_bios_scratch_engine_hung(rdev, false);
4879
4880         return 0;
4881 }
4882
4883 /**
4884  * cik_gfx_is_lockup - check if the 3D engine is locked up
4885  *
4886  * @rdev: radeon_device pointer
4887  * @ring: radeon_ring structure holding ring information
4888  *
4889  * Check if the 3D engine is locked up (CIK).
4890  * Returns true if the engine is locked, false if not.
4891  */
4892 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4893 {
4894         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4895
4896         if (!(reset_mask & (RADEON_RESET_GFX |
4897                             RADEON_RESET_COMPUTE |
4898                             RADEON_RESET_CP))) {
4899                 radeon_ring_lockup_update(ring);
4900                 return false;
4901         }
4902         /* force CP activities */
4903         radeon_ring_force_activity(rdev, ring);
4904         return radeon_ring_test_lockup(rdev, ring);
4905 }
4906
4907 /* MC */
4908 /**
4909  * cik_mc_program - program the GPU memory controller
4910  *
4911  * @rdev: radeon_device pointer
4912  *
4913  * Set the location of vram, gart, and AGP in the GPU's
4914  * physical address space (CIK).
4915  */
4916 static void cik_mc_program(struct radeon_device *rdev)
4917 {
4918         struct evergreen_mc_save save;
4919         u32 tmp;
4920         int i, j;
4921
4922         /* Initialize HDP */
4923         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4924                 WREG32((0x2c14 + j), 0x00000000);
4925                 WREG32((0x2c18 + j), 0x00000000);
4926                 WREG32((0x2c1c + j), 0x00000000);
4927                 WREG32((0x2c20 + j), 0x00000000);
4928                 WREG32((0x2c24 + j), 0x00000000);
4929         }
4930         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4931
4932         evergreen_mc_stop(rdev, &save);
4933         if (radeon_mc_wait_for_idle(rdev)) {
4934                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4935         }
4936         /* Lockout access through VGA aperture*/
4937         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4938         /* Update configuration */
4939         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4940                rdev->mc.vram_start >> 12);
4941         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4942                rdev->mc.vram_end >> 12);
4943         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4944                rdev->vram_scratch.gpu_addr >> 12);
4945         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4946         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4947         WREG32(MC_VM_FB_LOCATION, tmp);
4948         /* XXX double check these! */
4949         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4950         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4951         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4952         WREG32(MC_VM_AGP_BASE, 0);
4953         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4954         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4955         if (radeon_mc_wait_for_idle(rdev)) {
4956                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4957         }
4958         evergreen_mc_resume(rdev, &save);
4959         /* we need to own VRAM, so turn off the VGA renderer here
4960          * to stop it overwriting our objects */
4961         rv515_vga_render_disable(rdev);
4962 }
4963
4964 /**
4965  * cik_mc_init - initialize the memory controller driver params
4966  *
4967  * @rdev: radeon_device pointer
4968  *
4969  * Look up the amount of vram, vram width, and decide how to place
4970  * vram and gart within the GPU's physical address space (CIK).
4971  * Returns 0 for success.
4972  */
4973 static int cik_mc_init(struct radeon_device *rdev)
4974 {
4975         u32 tmp;
4976         int chansize, numchan;
4977
4978         /* Get VRAM informations */
4979         rdev->mc.vram_is_ddr = true;
4980         tmp = RREG32(MC_ARB_RAMCFG);
4981         if (tmp & CHANSIZE_MASK) {
4982                 chansize = 64;
4983         } else {
4984                 chansize = 32;
4985         }
4986         tmp = RREG32(MC_SHARED_CHMAP);
4987         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4988         case 0:
4989         default:
4990                 numchan = 1;
4991                 break;
4992         case 1:
4993                 numchan = 2;
4994                 break;
4995         case 2:
4996                 numchan = 4;
4997                 break;
4998         case 3:
4999                 numchan = 8;
5000                 break;
5001         case 4:
5002                 numchan = 3;
5003                 break;
5004         case 5:
5005                 numchan = 6;
5006                 break;
5007         case 6:
5008                 numchan = 10;
5009                 break;
5010         case 7:
5011                 numchan = 12;
5012                 break;
5013         case 8:
5014                 numchan = 16;
5015                 break;
5016         }
5017         rdev->mc.vram_width = numchan * chansize;
5018         /* Could aper size report 0 ? */
5019         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5020         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5021         /* size in MB on si */
5022         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5023         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5024         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5025         si_vram_gtt_location(rdev, &rdev->mc);
5026         radeon_update_bandwidth_info(rdev);
5027
5028         return 0;
5029 }
5030
5031 /*
5032  * GART
5033  * VMID 0 is the physical GPU addresses as used by the kernel.
5034  * VMIDs 1-15 are used for userspace clients and are handled
5035  * by the radeon vm/hsa code.
5036  */
5037 /**
5038  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5039  *
5040  * @rdev: radeon_device pointer
5041  *
5042  * Flush the TLB for the VMID 0 page table (CIK).
5043  */
5044 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5045 {
5046         /* flush hdp cache */
5047         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5048
5049         /* bits 0-15 are the VM contexts0-15 */
5050         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5051 }
5052
5053 /**
5054  * cik_pcie_gart_enable - gart enable
5055  *
5056  * @rdev: radeon_device pointer
5057  *
5058  * This sets up the TLBs, programs the page tables for VMID0,
5059  * sets up the hw for VMIDs 1-15 which are allocated on
5060  * demand, and sets up the global locations for the LDS, GDS,
5061  * and GPUVM for FSA64 clients (CIK).
5062  * Returns 0 for success, errors for failure.
5063  */
5064 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5065 {
5066         int r, i;
5067
5068         if (rdev->gart.robj == NULL) {
5069                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5070                 return -EINVAL;
5071         }
5072         r = radeon_gart_table_vram_pin(rdev);
5073         if (r)
5074                 return r;
5075         radeon_gart_restore(rdev);
5076         /* Setup TLB control */
5077         WREG32(MC_VM_MX_L1_TLB_CNTL,
5078                (0xA << 7) |
5079                ENABLE_L1_TLB |
5080                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5081                ENABLE_ADVANCED_DRIVER_MODEL |
5082                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5083         /* Setup L2 cache */
5084         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5085                ENABLE_L2_FRAGMENT_PROCESSING |
5086                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5087                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5088                EFFECTIVE_L2_QUEUE_SIZE(7) |
5089                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5090         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5091         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5092                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5093         /* setup context0 */
5094         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5095         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5096         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5097         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5098                         (u32)(rdev->dummy_page.addr >> 12));
5099         WREG32(VM_CONTEXT0_CNTL2, 0);
5100         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5101                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5102
5103         WREG32(0x15D4, 0);
5104         WREG32(0x15D8, 0);
5105         WREG32(0x15DC, 0);
5106
5107         /* empty context1-15 */
5108         /* FIXME start with 4G, once using 2 level pt switch to full
5109          * vm size space
5110          */
5111         /* set vm size, must be a multiple of 4 */
5112         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5113         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5114         for (i = 1; i < 16; i++) {
5115                 if (i < 8)
5116                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5117                                rdev->gart.table_addr >> 12);
5118                 else
5119                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5120                                rdev->gart.table_addr >> 12);
5121         }
5122
5123         /* enable context1-15 */
5124         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5125                (u32)(rdev->dummy_page.addr >> 12));
5126         WREG32(VM_CONTEXT1_CNTL2, 4);
5127         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5128                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5129                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5130                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5131                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5132                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5133                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5134                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5135                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5136                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5137                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5138                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5139                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5140
5141         /* TC cache setup ??? */
5142         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5143         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5144         WREG32(TC_CFG_L1_STORE_POLICY, 0);
5145
5146         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5147         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5148         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5149         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5150         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5151
5152         WREG32(TC_CFG_L1_VOLATILE, 0);
5153         WREG32(TC_CFG_L2_VOLATILE, 0);
5154
5155         if (rdev->family == CHIP_KAVERI) {
5156                 u32 tmp = RREG32(CHUB_CONTROL);
5157                 tmp &= ~BYPASS_VM;
5158                 WREG32(CHUB_CONTROL, tmp);
5159         }
5160
5161         /* XXX SH_MEM regs */
5162         /* where to put LDS, scratch, GPUVM in FSA64 space */
5163         mutex_lock(&rdev->srbm_mutex);
5164         for (i = 0; i < 16; i++) {
5165                 cik_srbm_select(rdev, 0, 0, 0, i);
5166                 /* CP and shaders */
5167                 WREG32(SH_MEM_CONFIG, 0);
5168                 WREG32(SH_MEM_APE1_BASE, 1);
5169                 WREG32(SH_MEM_APE1_LIMIT, 0);
5170                 WREG32(SH_MEM_BASES, 0);
5171                 /* SDMA GFX */
5172                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5173                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5174                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5175                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5176                 /* XXX SDMA RLC - todo */
5177         }
5178         cik_srbm_select(rdev, 0, 0, 0, 0);
5179         mutex_unlock(&rdev->srbm_mutex);
5180
5181         cik_pcie_gart_tlb_flush(rdev);
5182         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5183                  (unsigned)(rdev->mc.gtt_size >> 20),
5184                  (unsigned long long)rdev->gart.table_addr);
5185         rdev->gart.ready = true;
5186         return 0;
5187 }
5188
5189 /**
5190  * cik_pcie_gart_disable - gart disable
5191  *
5192  * @rdev: radeon_device pointer
5193  *
5194  * This disables all VM page table (CIK).
5195  */
5196 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5197 {
5198         /* Disable all tables */
5199         WREG32(VM_CONTEXT0_CNTL, 0);
5200         WREG32(VM_CONTEXT1_CNTL, 0);
5201         /* Setup TLB control */
5202         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5203                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5204         /* Setup L2 cache */
5205         WREG32(VM_L2_CNTL,
5206                ENABLE_L2_FRAGMENT_PROCESSING |
5207                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5208                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5209                EFFECTIVE_L2_QUEUE_SIZE(7) |
5210                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5211         WREG32(VM_L2_CNTL2, 0);
5212         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5213                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5214         radeon_gart_table_vram_unpin(rdev);
5215 }
5216
5217 /**
5218  * cik_pcie_gart_fini - vm fini callback
5219  *
5220  * @rdev: radeon_device pointer
5221  *
5222  * Tears down the driver GART/VM setup (CIK).
5223  */
5224 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5225 {
5226         cik_pcie_gart_disable(rdev);
5227         radeon_gart_table_vram_free(rdev);
5228         radeon_gart_fini(rdev);
5229 }
5230
5231 /* vm parser */
5232 /**
5233  * cik_ib_parse - vm ib_parse callback
5234  *
5235  * @rdev: radeon_device pointer
5236  * @ib: indirect buffer pointer
5237  *
5238  * CIK uses hw IB checking so this is a nop (CIK).
5239  */
5240 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5241 {
5242         return 0;
5243 }
5244
5245 /*
5246  * vm
5247  * VMID 0 is the physical GPU addresses as used by the kernel.
5248  * VMIDs 1-15 are used for userspace clients and are handled
5249  * by the radeon vm/hsa code.
5250  */
5251 /**
5252  * cik_vm_init - cik vm init callback
5253  *
5254  * @rdev: radeon_device pointer
5255  *
5256  * Inits cik specific vm parameters (number of VMs, base of vram for
5257  * VMIDs 1-15) (CIK).
5258  * Returns 0 for success.
5259  */
5260 int cik_vm_init(struct radeon_device *rdev)
5261 {
5262         /* number of VMs */
5263         rdev->vm_manager.nvm = 16;
5264         /* base offset of vram pages */
5265         if (rdev->flags & RADEON_IS_IGP) {
5266                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5267                 tmp <<= 22;
5268                 rdev->vm_manager.vram_base_offset = tmp;
5269         } else
5270                 rdev->vm_manager.vram_base_offset = 0;
5271
5272         return 0;
5273 }
5274
5275 /**
5276  * cik_vm_fini - cik vm fini callback
5277  *
5278  * @rdev: radeon_device pointer
5279  *
5280  * Tear down any asic specific VM setup (CIK).
5281  */
5282 void cik_vm_fini(struct radeon_device *rdev)
5283 {
5284 }
5285
5286 /**
5287  * cik_vm_decode_fault - print human readable fault info
5288  *
5289  * @rdev: radeon_device pointer
5290  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5291  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5292  *
5293  * Print human readable fault information (CIK).
5294  */
5295 static void cik_vm_decode_fault(struct radeon_device *rdev,
5296                                 u32 status, u32 addr, u32 mc_client)
5297 {
5298         u32 mc_id;
5299         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5300         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5301         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5302                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5303
5304         if (rdev->family == CHIP_HAWAII)
5305                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5306         else
5307                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5308
5309         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5310                protections, vmid, addr,
5311                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5312                block, mc_client, mc_id);
5313 }
5314
5315 /**
5316  * cik_vm_flush - cik vm flush using the CP
5317  *
5318  * @rdev: radeon_device pointer
5319  *
5320  * Update the page table base and flush the VM TLB
5321  * using the CP (CIK).
5322  */
5323 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5324 {
5325         struct radeon_ring *ring = &rdev->ring[ridx];
5326
5327         if (vm == NULL)
5328                 return;
5329
5330         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5331         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5332                                  WRITE_DATA_DST_SEL(0)));
5333         if (vm->id < 8) {
5334                 radeon_ring_write(ring,
5335                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5336         } else {
5337                 radeon_ring_write(ring,
5338                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5339         }
5340         radeon_ring_write(ring, 0);
5341         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5342
5343         /* update SH_MEM_* regs */
5344         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5345         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5346                                  WRITE_DATA_DST_SEL(0)));
5347         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5348         radeon_ring_write(ring, 0);
5349         radeon_ring_write(ring, VMID(vm->id));
5350
5351         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5352         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5353                                  WRITE_DATA_DST_SEL(0)));
5354         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5355         radeon_ring_write(ring, 0);
5356
5357         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5358         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5359         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5360         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5361
5362         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5363         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5364                                  WRITE_DATA_DST_SEL(0)));
5365         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5366         radeon_ring_write(ring, 0);
5367         radeon_ring_write(ring, VMID(0));
5368
5369         /* HDP flush */
5370         /* We should be using the WAIT_REG_MEM packet here like in
5371          * cik_fence_ring_emit(), but it causes the CP to hang in this
5372          * context...
5373          */
5374         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5375         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5376                                  WRITE_DATA_DST_SEL(0)));
5377         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5378         radeon_ring_write(ring, 0);
5379         radeon_ring_write(ring, 0);
5380
5381         /* bits 0-15 are the VM contexts0-15 */
5382         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5383         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5384                                  WRITE_DATA_DST_SEL(0)));
5385         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5386         radeon_ring_write(ring, 0);
5387         radeon_ring_write(ring, 1 << vm->id);
5388
5389         /* compute doesn't have PFP */
5390         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5391                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5392                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5393                 radeon_ring_write(ring, 0x0);
5394         }
5395 }
5396
5397 /*
5398  * RLC
5399  * The RLC is a multi-purpose microengine that handles a
5400  * variety of functions, the most important of which is
5401  * the interrupt controller.
5402  */
5403 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5404                                           bool enable)
5405 {
5406         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5407
5408         if (enable)
5409                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5410         else
5411                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5412         WREG32(CP_INT_CNTL_RING0, tmp);
5413 }
5414
5415 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5416 {
5417         u32 tmp;
5418
5419         tmp = RREG32(RLC_LB_CNTL);
5420         if (enable)
5421                 tmp |= LOAD_BALANCE_ENABLE;
5422         else
5423                 tmp &= ~LOAD_BALANCE_ENABLE;
5424         WREG32(RLC_LB_CNTL, tmp);
5425 }
5426
5427 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5428 {
5429         u32 i, j, k;
5430         u32 mask;
5431
5432         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5433                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5434                         cik_select_se_sh(rdev, i, j);
5435                         for (k = 0; k < rdev->usec_timeout; k++) {
5436                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5437                                         break;
5438                                 udelay(1);
5439                         }
5440                 }
5441         }
5442         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5443
5444         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5445         for (k = 0; k < rdev->usec_timeout; k++) {
5446                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5447                         break;
5448                 udelay(1);
5449         }
5450 }
5451
5452 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5453 {
5454         u32 tmp;
5455
5456         tmp = RREG32(RLC_CNTL);
5457         if (tmp != rlc)
5458                 WREG32(RLC_CNTL, rlc);
5459 }
5460
5461 static u32 cik_halt_rlc(struct radeon_device *rdev)
5462 {
5463         u32 data, orig;
5464
5465         orig = data = RREG32(RLC_CNTL);
5466
5467         if (data & RLC_ENABLE) {
5468                 u32 i;
5469
5470                 data &= ~RLC_ENABLE;
5471                 WREG32(RLC_CNTL, data);
5472
5473                 for (i = 0; i < rdev->usec_timeout; i++) {
5474                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5475                                 break;
5476                         udelay(1);
5477                 }
5478
5479                 cik_wait_for_rlc_serdes(rdev);
5480         }
5481
5482         return orig;
5483 }
5484
5485 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5486 {
5487         u32 tmp, i, mask;
5488
5489         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5490         WREG32(RLC_GPR_REG2, tmp);
5491
5492         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5493         for (i = 0; i < rdev->usec_timeout; i++) {
5494                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5495                         break;
5496                 udelay(1);
5497         }
5498
5499         for (i = 0; i < rdev->usec_timeout; i++) {
5500                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5501                         break;
5502                 udelay(1);
5503         }
5504 }
5505
5506 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5507 {
5508         u32 tmp;
5509
5510         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5511         WREG32(RLC_GPR_REG2, tmp);
5512 }
5513
5514 /**
5515  * cik_rlc_stop - stop the RLC ME
5516  *
5517  * @rdev: radeon_device pointer
5518  *
5519  * Halt the RLC ME (MicroEngine) (CIK).
5520  */
5521 static void cik_rlc_stop(struct radeon_device *rdev)
5522 {
5523         WREG32(RLC_CNTL, 0);
5524
5525         cik_enable_gui_idle_interrupt(rdev, false);
5526
5527         cik_wait_for_rlc_serdes(rdev);
5528 }
5529
5530 /**
5531  * cik_rlc_start - start the RLC ME
5532  *
5533  * @rdev: radeon_device pointer
5534  *
5535  * Unhalt the RLC ME (MicroEngine) (CIK).
5536  */
5537 static void cik_rlc_start(struct radeon_device *rdev)
5538 {
5539         WREG32(RLC_CNTL, RLC_ENABLE);
5540
5541         cik_enable_gui_idle_interrupt(rdev, true);
5542
5543         udelay(50);
5544 }
5545
5546 /**
5547  * cik_rlc_resume - setup the RLC hw
5548  *
5549  * @rdev: radeon_device pointer
5550  *
5551  * Initialize the RLC registers, load the ucode,
5552  * and start the RLC (CIK).
5553  * Returns 0 for success, -EINVAL if the ucode is not available.
5554  */
5555 static int cik_rlc_resume(struct radeon_device *rdev)
5556 {
5557         u32 i, size, tmp;
5558         const __be32 *fw_data;
5559
5560         if (!rdev->rlc_fw)
5561                 return -EINVAL;
5562
5563         switch (rdev->family) {
5564         case CHIP_BONAIRE:
5565         case CHIP_HAWAII:
5566         default:
5567                 size = BONAIRE_RLC_UCODE_SIZE;
5568                 break;
5569         case CHIP_KAVERI:
5570                 size = KV_RLC_UCODE_SIZE;
5571                 break;
5572         case CHIP_KABINI:
5573                 size = KB_RLC_UCODE_SIZE;
5574                 break;
5575         }
5576
5577         cik_rlc_stop(rdev);
5578
5579         /* disable CG */
5580         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5581         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5582
5583         si_rlc_reset(rdev);
5584
5585         cik_init_pg(rdev);
5586
5587         cik_init_cg(rdev);
5588
5589         WREG32(RLC_LB_CNTR_INIT, 0);
5590         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5591
5592         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5593         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5594         WREG32(RLC_LB_PARAMS, 0x00600408);
5595         WREG32(RLC_LB_CNTL, 0x80000004);
5596
5597         WREG32(RLC_MC_CNTL, 0);
5598         WREG32(RLC_UCODE_CNTL, 0);
5599
5600         fw_data = (const __be32 *)rdev->rlc_fw->data;
5601                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5602         for (i = 0; i < size; i++)
5603                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5604         WREG32(RLC_GPM_UCODE_ADDR, 0);
5605
5606         /* XXX - find out what chips support lbpw */
5607         cik_enable_lbpw(rdev, false);
5608
5609         if (rdev->family == CHIP_BONAIRE)
5610                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5611
5612         cik_rlc_start(rdev);
5613
5614         return 0;
5615 }
5616
5617 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5618 {
5619         u32 data, orig, tmp, tmp2;
5620
5621         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5622
5623         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5624                 cik_enable_gui_idle_interrupt(rdev, true);
5625
5626                 tmp = cik_halt_rlc(rdev);
5627
5628                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5629                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5630                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5631                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5632                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5633
5634                 cik_update_rlc(rdev, tmp);
5635
5636                 data |= CGCG_EN | CGLS_EN;
5637         } else {
5638                 cik_enable_gui_idle_interrupt(rdev, false);
5639
5640                 RREG32(CB_CGTT_SCLK_CTRL);
5641                 RREG32(CB_CGTT_SCLK_CTRL);
5642                 RREG32(CB_CGTT_SCLK_CTRL);
5643                 RREG32(CB_CGTT_SCLK_CTRL);
5644
5645                 data &= ~(CGCG_EN | CGLS_EN);
5646         }
5647
5648         if (orig != data)
5649                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5650
5651 }
5652
5653 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5654 {
5655         u32 data, orig, tmp = 0;
5656
5657         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5658                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5659                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5660                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5661                                 data |= CP_MEM_LS_EN;
5662                                 if (orig != data)
5663                                         WREG32(CP_MEM_SLP_CNTL, data);
5664                         }
5665                 }
5666
5667                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5668                 data &= 0xfffffffd;
5669                 if (orig != data)
5670                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5671
5672                 tmp = cik_halt_rlc(rdev);
5673
5674                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5675                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5676                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5677                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5678                 WREG32(RLC_SERDES_WR_CTRL, data);
5679
5680                 cik_update_rlc(rdev, tmp);
5681
5682                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5683                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5684                         data &= ~SM_MODE_MASK;
5685                         data |= SM_MODE(0x2);
5686                         data |= SM_MODE_ENABLE;
5687                         data &= ~CGTS_OVERRIDE;
5688                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5689                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5690                                 data &= ~CGTS_LS_OVERRIDE;
5691                         data &= ~ON_MONITOR_ADD_MASK;
5692                         data |= ON_MONITOR_ADD_EN;
5693                         data |= ON_MONITOR_ADD(0x96);
5694                         if (orig != data)
5695                                 WREG32(CGTS_SM_CTRL_REG, data);
5696                 }
5697         } else {
5698                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5699                 data |= 0x00000002;
5700                 if (orig != data)
5701                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5702
5703                 data = RREG32(RLC_MEM_SLP_CNTL);
5704                 if (data & RLC_MEM_LS_EN) {
5705                         data &= ~RLC_MEM_LS_EN;
5706                         WREG32(RLC_MEM_SLP_CNTL, data);
5707                 }
5708
5709                 data = RREG32(CP_MEM_SLP_CNTL);
5710                 if (data & CP_MEM_LS_EN) {
5711                         data &= ~CP_MEM_LS_EN;
5712                         WREG32(CP_MEM_SLP_CNTL, data);
5713                 }
5714
5715                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5716                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5717                 if (orig != data)
5718                         WREG32(CGTS_SM_CTRL_REG, data);
5719
5720                 tmp = cik_halt_rlc(rdev);
5721
5722                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5723                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5724                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5725                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5726                 WREG32(RLC_SERDES_WR_CTRL, data);
5727
5728                 cik_update_rlc(rdev, tmp);
5729         }
5730 }
5731
5732 static const u32 mc_cg_registers[] =
5733 {
5734         MC_HUB_MISC_HUB_CG,
5735         MC_HUB_MISC_SIP_CG,
5736         MC_HUB_MISC_VM_CG,
5737         MC_XPB_CLK_GAT,
5738         ATC_MISC_CG,
5739         MC_CITF_MISC_WR_CG,
5740         MC_CITF_MISC_RD_CG,
5741         MC_CITF_MISC_VM_CG,
5742         VM_L2_CG,
5743 };
5744
5745 static void cik_enable_mc_ls(struct radeon_device *rdev,
5746                              bool enable)
5747 {
5748         int i;
5749         u32 orig, data;
5750
5751         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5752                 orig = data = RREG32(mc_cg_registers[i]);
5753                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5754                         data |= MC_LS_ENABLE;
5755                 else
5756                         data &= ~MC_LS_ENABLE;
5757                 if (data != orig)
5758                         WREG32(mc_cg_registers[i], data);
5759         }
5760 }
5761
5762 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5763                                bool enable)
5764 {
5765         int i;
5766         u32 orig, data;
5767
5768         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5769                 orig = data = RREG32(mc_cg_registers[i]);
5770                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5771                         data |= MC_CG_ENABLE;
5772                 else
5773                         data &= ~MC_CG_ENABLE;
5774                 if (data != orig)
5775                         WREG32(mc_cg_registers[i], data);
5776         }
5777 }
5778
5779 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5780                                  bool enable)
5781 {
5782         u32 orig, data;
5783
5784         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5785                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5786                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5787         } else {
5788                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5789                 data |= 0xff000000;
5790                 if (data != orig)
5791                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5792
5793                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5794                 data |= 0xff000000;
5795                 if (data != orig)
5796                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5797         }
5798 }
5799
5800 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5801                                  bool enable)
5802 {
5803         u32 orig, data;
5804
5805         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5806                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5807                 data |= 0x100;
5808                 if (orig != data)
5809                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5810
5811                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5812                 data |= 0x100;
5813                 if (orig != data)
5814                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5815         } else {
5816                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5817                 data &= ~0x100;
5818                 if (orig != data)
5819                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5820
5821                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5822                 data &= ~0x100;
5823                 if (orig != data)
5824                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5825         }
5826 }
5827
5828 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5829                                 bool enable)
5830 {
5831         u32 orig, data;
5832
5833         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5834                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5835                 data = 0xfff;
5836                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5837
5838                 orig = data = RREG32(UVD_CGC_CTRL);
5839                 data |= DCM;
5840                 if (orig != data)
5841                         WREG32(UVD_CGC_CTRL, data);
5842         } else {
5843                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5844                 data &= ~0xfff;
5845                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5846
5847                 orig = data = RREG32(UVD_CGC_CTRL);
5848                 data &= ~DCM;
5849                 if (orig != data)
5850                         WREG32(UVD_CGC_CTRL, data);
5851         }
5852 }
5853
5854 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5855                                bool enable)
5856 {
5857         u32 orig, data;
5858
5859         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5860
5861         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5862                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5863                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5864         else
5865                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5866                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5867
5868         if (orig != data)
5869                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5870 }
5871
5872 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5873                                 bool enable)
5874 {
5875         u32 orig, data;
5876
5877         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5878
5879         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5880                 data &= ~CLOCK_GATING_DIS;
5881         else
5882                 data |= CLOCK_GATING_DIS;
5883
5884         if (orig != data)
5885                 WREG32(HDP_HOST_PATH_CNTL, data);
5886 }
5887
5888 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5889                               bool enable)
5890 {
5891         u32 orig, data;
5892
5893         orig = data = RREG32(HDP_MEM_POWER_LS);
5894
5895         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5896                 data |= HDP_LS_ENABLE;
5897         else
5898                 data &= ~HDP_LS_ENABLE;
5899
5900         if (orig != data)
5901                 WREG32(HDP_MEM_POWER_LS, data);
5902 }
5903
5904 void cik_update_cg(struct radeon_device *rdev,
5905                    u32 block, bool enable)
5906 {
5907
5908         if (block & RADEON_CG_BLOCK_GFX) {
5909                 cik_enable_gui_idle_interrupt(rdev, false);
5910                 /* order matters! */
5911                 if (enable) {
5912                         cik_enable_mgcg(rdev, true);
5913                         cik_enable_cgcg(rdev, true);
5914                 } else {
5915                         cik_enable_cgcg(rdev, false);
5916                         cik_enable_mgcg(rdev, false);
5917                 }
5918                 cik_enable_gui_idle_interrupt(rdev, true);
5919         }
5920
5921         if (block & RADEON_CG_BLOCK_MC) {
5922                 if (!(rdev->flags & RADEON_IS_IGP)) {
5923                         cik_enable_mc_mgcg(rdev, enable);
5924                         cik_enable_mc_ls(rdev, enable);
5925                 }
5926         }
5927
5928         if (block & RADEON_CG_BLOCK_SDMA) {
5929                 cik_enable_sdma_mgcg(rdev, enable);
5930                 cik_enable_sdma_mgls(rdev, enable);
5931         }
5932
5933         if (block & RADEON_CG_BLOCK_BIF) {
5934                 cik_enable_bif_mgls(rdev, enable);
5935         }
5936
5937         if (block & RADEON_CG_BLOCK_UVD) {
5938                 if (rdev->has_uvd)
5939                         cik_enable_uvd_mgcg(rdev, enable);
5940         }
5941
5942         if (block & RADEON_CG_BLOCK_HDP) {
5943                 cik_enable_hdp_mgcg(rdev, enable);
5944                 cik_enable_hdp_ls(rdev, enable);
5945         }
5946 }
5947
5948 static void cik_init_cg(struct radeon_device *rdev)
5949 {
5950
5951         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5952
5953         if (rdev->has_uvd)
5954                 si_init_uvd_internal_cg(rdev);
5955
5956         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5957                              RADEON_CG_BLOCK_SDMA |
5958                              RADEON_CG_BLOCK_BIF |
5959                              RADEON_CG_BLOCK_UVD |
5960                              RADEON_CG_BLOCK_HDP), true);
5961 }
5962
5963 static void cik_fini_cg(struct radeon_device *rdev)
5964 {
5965         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5966                              RADEON_CG_BLOCK_SDMA |
5967                              RADEON_CG_BLOCK_BIF |
5968                              RADEON_CG_BLOCK_UVD |
5969                              RADEON_CG_BLOCK_HDP), false);
5970
5971         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5972 }
5973
5974 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5975                                           bool enable)
5976 {
5977         u32 data, orig;
5978
5979         orig = data = RREG32(RLC_PG_CNTL);
5980         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5981                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5982         else
5983                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5984         if (orig != data)
5985                 WREG32(RLC_PG_CNTL, data);
5986 }
5987
5988 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5989                                           bool enable)
5990 {
5991         u32 data, orig;
5992
5993         orig = data = RREG32(RLC_PG_CNTL);
5994         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5995                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5996         else
5997                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5998         if (orig != data)
5999                 WREG32(RLC_PG_CNTL, data);
6000 }
6001
6002 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6003 {
6004         u32 data, orig;
6005
6006         orig = data = RREG32(RLC_PG_CNTL);
6007         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6008                 data &= ~DISABLE_CP_PG;
6009         else
6010                 data |= DISABLE_CP_PG;
6011         if (orig != data)
6012                 WREG32(RLC_PG_CNTL, data);
6013 }
6014
6015 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6016 {
6017         u32 data, orig;
6018
6019         orig = data = RREG32(RLC_PG_CNTL);
6020         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6021                 data &= ~DISABLE_GDS_PG;
6022         else
6023                 data |= DISABLE_GDS_PG;
6024         if (orig != data)
6025                 WREG32(RLC_PG_CNTL, data);
6026 }
6027
6028 #define CP_ME_TABLE_SIZE    96
6029 #define CP_ME_TABLE_OFFSET  2048
6030 #define CP_MEC_TABLE_OFFSET 4096
6031
6032 void cik_init_cp_pg_table(struct radeon_device *rdev)
6033 {
6034         const __be32 *fw_data;
6035         volatile u32 *dst_ptr;
6036         int me, i, max_me = 4;
6037         u32 bo_offset = 0;
6038         u32 table_offset;
6039
6040         if (rdev->family == CHIP_KAVERI)
6041                 max_me = 5;
6042
6043         if (rdev->rlc.cp_table_ptr == NULL)
6044                 return;
6045
6046         /* write the cp table buffer */
6047         dst_ptr = rdev->rlc.cp_table_ptr;
6048         for (me = 0; me < max_me; me++) {
6049                 if (me == 0) {
6050                         fw_data = (const __be32 *)rdev->ce_fw->data;
6051                         table_offset = CP_ME_TABLE_OFFSET;
6052                 } else if (me == 1) {
6053                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6054                         table_offset = CP_ME_TABLE_OFFSET;
6055                 } else if (me == 2) {
6056                         fw_data = (const __be32 *)rdev->me_fw->data;
6057                         table_offset = CP_ME_TABLE_OFFSET;
6058                 } else {
6059                         fw_data = (const __be32 *)rdev->mec_fw->data;
6060                         table_offset = CP_MEC_TABLE_OFFSET;
6061                 }
6062
6063                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6064                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6065                 }
6066                 bo_offset += CP_ME_TABLE_SIZE;
6067         }
6068 }
6069
6070 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6071                                 bool enable)
6072 {
6073         u32 data, orig;
6074
6075         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6076                 orig = data = RREG32(RLC_PG_CNTL);
6077                 data |= GFX_PG_ENABLE;
6078                 if (orig != data)
6079                         WREG32(RLC_PG_CNTL, data);
6080
6081                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6082                 data |= AUTO_PG_EN;
6083                 if (orig != data)
6084                         WREG32(RLC_AUTO_PG_CTRL, data);
6085         } else {
6086                 orig = data = RREG32(RLC_PG_CNTL);
6087                 data &= ~GFX_PG_ENABLE;
6088                 if (orig != data)
6089                         WREG32(RLC_PG_CNTL, data);
6090
6091                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6092                 data &= ~AUTO_PG_EN;
6093                 if (orig != data)
6094                         WREG32(RLC_AUTO_PG_CTRL, data);
6095
6096                 data = RREG32(DB_RENDER_CONTROL);
6097         }
6098 }
6099
6100 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6101 {
6102         u32 mask = 0, tmp, tmp1;
6103         int i;
6104
6105         cik_select_se_sh(rdev, se, sh);
6106         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6107         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6108         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6109
6110         tmp &= 0xffff0000;
6111
6112         tmp |= tmp1;
6113         tmp >>= 16;
6114
6115         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6116                 mask <<= 1;
6117                 mask |= 1;
6118         }
6119
6120         return (~tmp) & mask;
6121 }
6122
6123 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6124 {
6125         u32 i, j, k, active_cu_number = 0;
6126         u32 mask, counter, cu_bitmap;
6127         u32 tmp = 0;
6128
6129         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6130                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6131                         mask = 1;
6132                         cu_bitmap = 0;
6133                         counter = 0;
6134                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6135                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6136                                         if (counter < 2)
6137                                                 cu_bitmap |= mask;
6138                                         counter ++;
6139                                 }
6140                                 mask <<= 1;
6141                         }
6142
6143                         active_cu_number += counter;
6144                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6145                 }
6146         }
6147
6148         WREG32(RLC_PG_AO_CU_MASK, tmp);
6149
6150         tmp = RREG32(RLC_MAX_PG_CU);
6151         tmp &= ~MAX_PU_CU_MASK;
6152         tmp |= MAX_PU_CU(active_cu_number);
6153         WREG32(RLC_MAX_PG_CU, tmp);
6154 }
6155
6156 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6157                                        bool enable)
6158 {
6159         u32 data, orig;
6160
6161         orig = data = RREG32(RLC_PG_CNTL);
6162         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6163                 data |= STATIC_PER_CU_PG_ENABLE;
6164         else
6165                 data &= ~STATIC_PER_CU_PG_ENABLE;
6166         if (orig != data)
6167                 WREG32(RLC_PG_CNTL, data);
6168 }
6169
6170 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6171                                         bool enable)
6172 {
6173         u32 data, orig;
6174
6175         orig = data = RREG32(RLC_PG_CNTL);
6176         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6177                 data |= DYN_PER_CU_PG_ENABLE;
6178         else
6179                 data &= ~DYN_PER_CU_PG_ENABLE;
6180         if (orig != data)
6181                 WREG32(RLC_PG_CNTL, data);
6182 }
6183
6184 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6185 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6186
6187 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6188 {
6189         u32 data, orig;
6190         u32 i;
6191
6192         if (rdev->rlc.cs_data) {
6193                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6194                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6195                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6196                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6197         } else {
6198                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6199                 for (i = 0; i < 3; i++)
6200                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6201         }
6202         if (rdev->rlc.reg_list) {
6203                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6204                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6205                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6206         }
6207
6208         orig = data = RREG32(RLC_PG_CNTL);
6209         data |= GFX_PG_SRC;
6210         if (orig != data)
6211                 WREG32(RLC_PG_CNTL, data);
6212
6213         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6214         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6215
6216         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6217         data &= ~IDLE_POLL_COUNT_MASK;
6218         data |= IDLE_POLL_COUNT(0x60);
6219         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6220
6221         data = 0x10101010;
6222         WREG32(RLC_PG_DELAY, data);
6223
6224         data = RREG32(RLC_PG_DELAY_2);
6225         data &= ~0xff;
6226         data |= 0x3;
6227         WREG32(RLC_PG_DELAY_2, data);
6228
6229         data = RREG32(RLC_AUTO_PG_CTRL);
6230         data &= ~GRBM_REG_SGIT_MASK;
6231         data |= GRBM_REG_SGIT(0x700);
6232         WREG32(RLC_AUTO_PG_CTRL, data);
6233
6234 }
6235
6236 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6237 {
6238         cik_enable_gfx_cgpg(rdev, enable);
6239         cik_enable_gfx_static_mgpg(rdev, enable);
6240         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6241 }
6242
6243 u32 cik_get_csb_size(struct radeon_device *rdev)
6244 {
6245         u32 count = 0;
6246         const struct cs_section_def *sect = NULL;
6247         const struct cs_extent_def *ext = NULL;
6248
6249         if (rdev->rlc.cs_data == NULL)
6250                 return 0;
6251
6252         /* begin clear state */
6253         count += 2;
6254         /* context control state */
6255         count += 3;
6256
6257         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6258                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6259                         if (sect->id == SECT_CONTEXT)
6260                                 count += 2 + ext->reg_count;
6261                         else
6262                                 return 0;
6263                 }
6264         }
6265         /* pa_sc_raster_config/pa_sc_raster_config1 */
6266         count += 4;
6267         /* end clear state */
6268         count += 2;
6269         /* clear state */
6270         count += 2;
6271
6272         return count;
6273 }
6274
6275 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6276 {
6277         u32 count = 0, i;
6278         const struct cs_section_def *sect = NULL;
6279         const struct cs_extent_def *ext = NULL;
6280
6281         if (rdev->rlc.cs_data == NULL)
6282                 return;
6283         if (buffer == NULL)
6284                 return;
6285
6286         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6287         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6288
6289         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6290         buffer[count++] = cpu_to_le32(0x80000000);
6291         buffer[count++] = cpu_to_le32(0x80000000);
6292
6293         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6294                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6295                         if (sect->id == SECT_CONTEXT) {
6296                                 buffer[count++] =
6297                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6298                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6299                                 for (i = 0; i < ext->reg_count; i++)
6300                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6301                         } else {
6302                                 return;
6303                         }
6304                 }
6305         }
6306
6307         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6308         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6309         switch (rdev->family) {
6310         case CHIP_BONAIRE:
6311                 buffer[count++] = cpu_to_le32(0x16000012);
6312                 buffer[count++] = cpu_to_le32(0x00000000);
6313                 break;
6314         case CHIP_KAVERI:
6315                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6316                 buffer[count++] = cpu_to_le32(0x00000000);
6317                 break;
6318         case CHIP_KABINI:
6319                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6320                 buffer[count++] = cpu_to_le32(0x00000000);
6321                 break;
6322         case CHIP_HAWAII:
6323                 buffer[count++] = 0x3a00161a;
6324                 buffer[count++] = 0x0000002e;
6325                 break;
6326         default:
6327                 buffer[count++] = cpu_to_le32(0x00000000);
6328                 buffer[count++] = cpu_to_le32(0x00000000);
6329                 break;
6330         }
6331
6332         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6333         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6334
6335         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6336         buffer[count++] = cpu_to_le32(0);
6337 }
6338
6339 static void cik_init_pg(struct radeon_device *rdev)
6340 {
6341         if (rdev->pg_flags) {
6342                 cik_enable_sck_slowdown_on_pu(rdev, true);
6343                 cik_enable_sck_slowdown_on_pd(rdev, true);
6344                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6345                         cik_init_gfx_cgpg(rdev);
6346                         cik_enable_cp_pg(rdev, true);
6347                         cik_enable_gds_pg(rdev, true);
6348                 }
6349                 cik_init_ao_cu_mask(rdev);
6350                 cik_update_gfx_pg(rdev, true);
6351         }
6352 }
6353
6354 static void cik_fini_pg(struct radeon_device *rdev)
6355 {
6356         if (rdev->pg_flags) {
6357                 cik_update_gfx_pg(rdev, false);
6358                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6359                         cik_enable_cp_pg(rdev, false);
6360                         cik_enable_gds_pg(rdev, false);
6361                 }
6362         }
6363 }
6364
6365 /*
6366  * Interrupts
6367  * Starting with r6xx, interrupts are handled via a ring buffer.
6368  * Ring buffers are areas of GPU accessible memory that the GPU
6369  * writes interrupt vectors into and the host reads vectors out of.
6370  * There is a rptr (read pointer) that determines where the
6371  * host is currently reading, and a wptr (write pointer)
6372  * which determines where the GPU has written.  When the
6373  * pointers are equal, the ring is idle.  When the GPU
6374  * writes vectors to the ring buffer, it increments the
6375  * wptr.  When there is an interrupt, the host then starts
6376  * fetching commands and processing them until the pointers are
6377  * equal again at which point it updates the rptr.
6378  */
6379
6380 /**
6381  * cik_enable_interrupts - Enable the interrupt ring buffer
6382  *
6383  * @rdev: radeon_device pointer
6384  *
6385  * Enable the interrupt ring buffer (CIK).
6386  */
6387 static void cik_enable_interrupts(struct radeon_device *rdev)
6388 {
6389         u32 ih_cntl = RREG32(IH_CNTL);
6390         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6391
6392         ih_cntl |= ENABLE_INTR;
6393         ih_rb_cntl |= IH_RB_ENABLE;
6394         WREG32(IH_CNTL, ih_cntl);
6395         WREG32(IH_RB_CNTL, ih_rb_cntl);
6396         rdev->ih.enabled = true;
6397 }
6398
6399 /**
6400  * cik_disable_interrupts - Disable the interrupt ring buffer
6401  *
6402  * @rdev: radeon_device pointer
6403  *
6404  * Disable the interrupt ring buffer (CIK).
6405  */
6406 static void cik_disable_interrupts(struct radeon_device *rdev)
6407 {
6408         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6409         u32 ih_cntl = RREG32(IH_CNTL);
6410
6411         ih_rb_cntl &= ~IH_RB_ENABLE;
6412         ih_cntl &= ~ENABLE_INTR;
6413         WREG32(IH_RB_CNTL, ih_rb_cntl);
6414         WREG32(IH_CNTL, ih_cntl);
6415         /* set rptr, wptr to 0 */
6416         WREG32(IH_RB_RPTR, 0);
6417         WREG32(IH_RB_WPTR, 0);
6418         rdev->ih.enabled = false;
6419         rdev->ih.rptr = 0;
6420 }
6421
6422 /**
6423  * cik_disable_interrupt_state - Disable all interrupt sources
6424  *
6425  * @rdev: radeon_device pointer
6426  *
6427  * Clear all interrupt enable bits used by the driver (CIK).
6428  */
6429 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6430 {
6431         u32 tmp;
6432
6433         /* gfx ring */
6434         tmp = RREG32(CP_INT_CNTL_RING0) &
6435                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6436         WREG32(CP_INT_CNTL_RING0, tmp);
6437         /* sdma */
6438         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6439         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6440         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6441         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6442         /* compute queues */
6443         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6444         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6445         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6446         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6447         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6448         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6449         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6450         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6451         /* grbm */
6452         WREG32(GRBM_INT_CNTL, 0);
6453         /* vline/vblank, etc. */
6454         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6455         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6456         if (rdev->num_crtc >= 4) {
6457                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6458                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6459         }
6460         if (rdev->num_crtc >= 6) {
6461                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6462                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6463         }
6464
6465         /* dac hotplug */
6466         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6467
6468         /* digital hotplug */
6469         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6470         WREG32(DC_HPD1_INT_CONTROL, tmp);
6471         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6472         WREG32(DC_HPD2_INT_CONTROL, tmp);
6473         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6474         WREG32(DC_HPD3_INT_CONTROL, tmp);
6475         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6476         WREG32(DC_HPD4_INT_CONTROL, tmp);
6477         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6478         WREG32(DC_HPD5_INT_CONTROL, tmp);
6479         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6480         WREG32(DC_HPD6_INT_CONTROL, tmp);
6481
6482 }
6483
6484 /**
6485  * cik_irq_init - init and enable the interrupt ring
6486  *
6487  * @rdev: radeon_device pointer
6488  *
6489  * Allocate a ring buffer for the interrupt controller,
6490  * enable the RLC, disable interrupts, enable the IH
6491  * ring buffer and enable it (CIK).
6492  * Called at device load and reume.
6493  * Returns 0 for success, errors for failure.
6494  */
6495 static int cik_irq_init(struct radeon_device *rdev)
6496 {
6497         int ret = 0;
6498         int rb_bufsz;
6499         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6500
6501         /* allocate ring */
6502         ret = r600_ih_ring_alloc(rdev);
6503         if (ret)
6504                 return ret;
6505
6506         /* disable irqs */
6507         cik_disable_interrupts(rdev);
6508
6509         /* init rlc */
6510         ret = cik_rlc_resume(rdev);
6511         if (ret) {
6512                 r600_ih_ring_fini(rdev);
6513                 return ret;
6514         }
6515
6516         /* setup interrupt control */
6517         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6518         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6519         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6520         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6521          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6522          */
6523         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6524         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6525         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6526         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6527
6528         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6529         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6530
6531         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6532                       IH_WPTR_OVERFLOW_CLEAR |
6533                       (rb_bufsz << 1));
6534
6535         if (rdev->wb.enabled)
6536                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6537
6538         /* set the writeback address whether it's enabled or not */
6539         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6540         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6541
6542         WREG32(IH_RB_CNTL, ih_rb_cntl);
6543
6544         /* set rptr, wptr to 0 */
6545         WREG32(IH_RB_RPTR, 0);
6546         WREG32(IH_RB_WPTR, 0);
6547
6548         /* Default settings for IH_CNTL (disabled at first) */
6549         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6550         /* RPTR_REARM only works if msi's are enabled */
6551         if (rdev->msi_enabled)
6552                 ih_cntl |= RPTR_REARM;
6553         WREG32(IH_CNTL, ih_cntl);
6554
6555         /* force the active interrupt state to all disabled */
6556         cik_disable_interrupt_state(rdev);
6557
6558         pci_set_master(rdev->pdev);
6559
6560         /* enable irqs */
6561         cik_enable_interrupts(rdev);
6562
6563         return ret;
6564 }
6565
6566 /**
6567  * cik_irq_set - enable/disable interrupt sources
6568  *
6569  * @rdev: radeon_device pointer
6570  *
6571  * Enable interrupt sources on the GPU (vblanks, hpd,
6572  * etc.) (CIK).
6573  * Returns 0 for success, errors for failure.
6574  */
6575 int cik_irq_set(struct radeon_device *rdev)
6576 {
6577         u32 cp_int_cntl;
6578         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6579         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6580         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6581         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6582         u32 grbm_int_cntl = 0;
6583         u32 dma_cntl, dma_cntl1;
6584         u32 thermal_int;
6585
6586         if (!rdev->irq.installed) {
6587                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6588                 return -EINVAL;
6589         }
6590         /* don't enable anything if the ih is disabled */
6591         if (!rdev->ih.enabled) {
6592                 cik_disable_interrupts(rdev);
6593                 /* force the active interrupt state to all disabled */
6594                 cik_disable_interrupt_state(rdev);
6595                 return 0;
6596         }
6597
6598         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6599                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6600         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6601
6602         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6603         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6604         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6605         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6606         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6607         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6608
6609         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6610         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6611
6612         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6613         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6614         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6615         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6616         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6617         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6618         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6619         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6620
6621         if (rdev->flags & RADEON_IS_IGP)
6622                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6623                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6624         else
6625                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6626                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6627
6628         /* enable CP interrupts on all rings */
6629         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6630                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6631                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6632         }
6633         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6634                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6635                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6636                 if (ring->me == 1) {
6637                         switch (ring->pipe) {
6638                         case 0:
6639                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6640                                 break;
6641                         case 1:
6642                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6643                                 break;
6644                         case 2:
6645                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6646                                 break;
6647                         case 3:
6648                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6649                                 break;
6650                         default:
6651                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6652                                 break;
6653                         }
6654                 } else if (ring->me == 2) {
6655                         switch (ring->pipe) {
6656                         case 0:
6657                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6658                                 break;
6659                         case 1:
6660                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6661                                 break;
6662                         case 2:
6663                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6664                                 break;
6665                         case 3:
6666                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6667                                 break;
6668                         default:
6669                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6670                                 break;
6671                         }
6672                 } else {
6673                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6674                 }
6675         }
6676         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6677                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6678                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6679                 if (ring->me == 1) {
6680                         switch (ring->pipe) {
6681                         case 0:
6682                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6683                                 break;
6684                         case 1:
6685                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6686                                 break;
6687                         case 2:
6688                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6689                                 break;
6690                         case 3:
6691                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6692                                 break;
6693                         default:
6694                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6695                                 break;
6696                         }
6697                 } else if (ring->me == 2) {
6698                         switch (ring->pipe) {
6699                         case 0:
6700                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6701                                 break;
6702                         case 1:
6703                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6704                                 break;
6705                         case 2:
6706                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6707                                 break;
6708                         case 3:
6709                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6710                                 break;
6711                         default:
6712                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6713                                 break;
6714                         }
6715                 } else {
6716                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6717                 }
6718         }
6719
6720         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6721                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6722                 dma_cntl |= TRAP_ENABLE;
6723         }
6724
6725         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6726                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6727                 dma_cntl1 |= TRAP_ENABLE;
6728         }
6729
6730         if (rdev->irq.crtc_vblank_int[0] ||
6731             atomic_read(&rdev->irq.pflip[0])) {
6732                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6733                 crtc1 |= VBLANK_INTERRUPT_MASK;
6734         }
6735         if (rdev->irq.crtc_vblank_int[1] ||
6736             atomic_read(&rdev->irq.pflip[1])) {
6737                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6738                 crtc2 |= VBLANK_INTERRUPT_MASK;
6739         }
6740         if (rdev->irq.crtc_vblank_int[2] ||
6741             atomic_read(&rdev->irq.pflip[2])) {
6742                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6743                 crtc3 |= VBLANK_INTERRUPT_MASK;
6744         }
6745         if (rdev->irq.crtc_vblank_int[3] ||
6746             atomic_read(&rdev->irq.pflip[3])) {
6747                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6748                 crtc4 |= VBLANK_INTERRUPT_MASK;
6749         }
6750         if (rdev->irq.crtc_vblank_int[4] ||
6751             atomic_read(&rdev->irq.pflip[4])) {
6752                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6753                 crtc5 |= VBLANK_INTERRUPT_MASK;
6754         }
6755         if (rdev->irq.crtc_vblank_int[5] ||
6756             atomic_read(&rdev->irq.pflip[5])) {
6757                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6758                 crtc6 |= VBLANK_INTERRUPT_MASK;
6759         }
6760         if (rdev->irq.hpd[0]) {
6761                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6762                 hpd1 |= DC_HPDx_INT_EN;
6763         }
6764         if (rdev->irq.hpd[1]) {
6765                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6766                 hpd2 |= DC_HPDx_INT_EN;
6767         }
6768         if (rdev->irq.hpd[2]) {
6769                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6770                 hpd3 |= DC_HPDx_INT_EN;
6771         }
6772         if (rdev->irq.hpd[3]) {
6773                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6774                 hpd4 |= DC_HPDx_INT_EN;
6775         }
6776         if (rdev->irq.hpd[4]) {
6777                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6778                 hpd5 |= DC_HPDx_INT_EN;
6779         }
6780         if (rdev->irq.hpd[5]) {
6781                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6782                 hpd6 |= DC_HPDx_INT_EN;
6783         }
6784
6785         if (rdev->irq.dpm_thermal) {
6786                 DRM_DEBUG("dpm thermal\n");
6787                 if (rdev->flags & RADEON_IS_IGP)
6788                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6789                 else
6790                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6791         }
6792
6793         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6794
6795         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6796         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6797
6798         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6799         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6800         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6801         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6802         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6803         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6804         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6805         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6806
6807         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6808
6809         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6810         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6811         if (rdev->num_crtc >= 4) {
6812                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6813                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6814         }
6815         if (rdev->num_crtc >= 6) {
6816                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6817                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6818         }
6819
6820         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6821         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6822         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6823         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6824         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6825         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6826
6827         if (rdev->flags & RADEON_IS_IGP)
6828                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6829         else
6830                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6831
6832         return 0;
6833 }
6834
6835 /**
6836  * cik_irq_ack - ack interrupt sources
6837  *
6838  * @rdev: radeon_device pointer
6839  *
6840  * Ack interrupt sources on the GPU (vblanks, hpd,
6841  * etc.) (CIK).  Certain interrupts sources are sw
6842  * generated and do not require an explicit ack.
6843  */
6844 static inline void cik_irq_ack(struct radeon_device *rdev)
6845 {
6846         u32 tmp;
6847
6848         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6849         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6850         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6851         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6852         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6853         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6854         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6855
6856         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6857                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6858         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6859                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6860         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6861                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6862         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6863                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6864
6865         if (rdev->num_crtc >= 4) {
6866                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6867                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6868                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6869                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6870                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6871                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6872                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6873                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6874         }
6875
6876         if (rdev->num_crtc >= 6) {
6877                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6878                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6879                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6880                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6881                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6882                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6883                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6884                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6885         }
6886
6887         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6888                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6889                 tmp |= DC_HPDx_INT_ACK;
6890                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6891         }
6892         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6893                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6894                 tmp |= DC_HPDx_INT_ACK;
6895                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6896         }
6897         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6898                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6899                 tmp |= DC_HPDx_INT_ACK;
6900                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6901         }
6902         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6903                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6904                 tmp |= DC_HPDx_INT_ACK;
6905                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6906         }
6907         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6908                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6909                 tmp |= DC_HPDx_INT_ACK;
6910                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6911         }
6912         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6913                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6914                 tmp |= DC_HPDx_INT_ACK;
6915                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6916         }
6917 }
6918
6919 /**
6920  * cik_irq_disable - disable interrupts
6921  *
6922  * @rdev: radeon_device pointer
6923  *
6924  * Disable interrupts on the hw (CIK).
6925  */
6926 static void cik_irq_disable(struct radeon_device *rdev)
6927 {
6928         cik_disable_interrupts(rdev);
6929         /* Wait and acknowledge irq */
6930         mdelay(1);
6931         cik_irq_ack(rdev);
6932         cik_disable_interrupt_state(rdev);
6933 }
6934
6935 /**
6936  * cik_irq_disable - disable interrupts for suspend
6937  *
6938  * @rdev: radeon_device pointer
6939  *
6940  * Disable interrupts and stop the RLC (CIK).
6941  * Used for suspend.
6942  */
6943 static void cik_irq_suspend(struct radeon_device *rdev)
6944 {
6945         cik_irq_disable(rdev);
6946         cik_rlc_stop(rdev);
6947 }
6948
6949 /**
6950  * cik_irq_fini - tear down interrupt support
6951  *
6952  * @rdev: radeon_device pointer
6953  *
6954  * Disable interrupts on the hw and free the IH ring
6955  * buffer (CIK).
6956  * Used for driver unload.
6957  */
6958 static void cik_irq_fini(struct radeon_device *rdev)
6959 {
6960         cik_irq_suspend(rdev);
6961         r600_ih_ring_fini(rdev);
6962 }
6963
6964 /**
6965  * cik_get_ih_wptr - get the IH ring buffer wptr
6966  *
6967  * @rdev: radeon_device pointer
6968  *
6969  * Get the IH ring buffer wptr from either the register
6970  * or the writeback memory buffer (CIK).  Also check for
6971  * ring buffer overflow and deal with it.
6972  * Used by cik_irq_process().
6973  * Returns the value of the wptr.
6974  */
6975 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6976 {
6977         u32 wptr, tmp;
6978
6979         if (rdev->wb.enabled)
6980                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6981         else
6982                 wptr = RREG32(IH_RB_WPTR);
6983
6984         if (wptr & RB_OVERFLOW) {
6985                 /* When a ring buffer overflow happen start parsing interrupt
6986                  * from the last not overwritten vector (wptr + 16). Hopefully
6987                  * this should allow us to catchup.
6988                  */
6989                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6990                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6991                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6992                 tmp = RREG32(IH_RB_CNTL);
6993                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6994                 WREG32(IH_RB_CNTL, tmp);
6995         }
6996         return (wptr & rdev->ih.ptr_mask);
6997 }
6998
6999 /*        CIK IV Ring
7000  * Each IV ring entry is 128 bits:
7001  * [7:0]    - interrupt source id
7002  * [31:8]   - reserved
7003  * [59:32]  - interrupt source data
7004  * [63:60]  - reserved
7005  * [71:64]  - RINGID
7006  *            CP:
7007  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7008  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7009  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7010  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7011  *            PIPE_ID - ME0 0=3D
7012  *                    - ME1&2 compute dispatcher (4 pipes each)
7013  *            SDMA:
7014  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7015  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7016  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7017  * [79:72]  - VMID
7018  * [95:80]  - PASID
7019  * [127:96] - reserved
7020  */
7021 /**
7022  * cik_irq_process - interrupt handler
7023  *
7024  * @rdev: radeon_device pointer
7025  *
7026  * Interrupt hander (CIK).  Walk the IH ring,
7027  * ack interrupts and schedule work to handle
7028  * interrupt events.
7029  * Returns irq process return code.
7030  */
7031 int cik_irq_process(struct radeon_device *rdev)
7032 {
7033         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7034         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7035         u32 wptr;
7036         u32 rptr;
7037         u32 src_id, src_data, ring_id;
7038         u8 me_id, pipe_id, queue_id;
7039         u32 ring_index;
7040         bool queue_hotplug = false;
7041         bool queue_reset = false;
7042         u32 addr, status, mc_client;
7043         bool queue_thermal = false;
7044
7045         if (!rdev->ih.enabled || rdev->shutdown)
7046                 return IRQ_NONE;
7047
7048         wptr = cik_get_ih_wptr(rdev);
7049
7050 restart_ih:
7051         /* is somebody else already processing irqs? */
7052         if (atomic_xchg(&rdev->ih.lock, 1))
7053                 return IRQ_NONE;
7054
7055         rptr = rdev->ih.rptr;
7056         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7057
7058         /* Order reading of wptr vs. reading of IH ring data */
7059         rmb();
7060
7061         /* display interrupts */
7062         cik_irq_ack(rdev);
7063
7064         while (rptr != wptr) {
7065                 /* wptr/rptr are in bytes! */
7066                 ring_index = rptr / 4;
7067                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7068                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7069                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7070
7071                 switch (src_id) {
7072                 case 1: /* D1 vblank/vline */
7073                         switch (src_data) {
7074                         case 0: /* D1 vblank */
7075                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7076                                         if (rdev->irq.crtc_vblank_int[0]) {
7077                                                 drm_handle_vblank(rdev->ddev, 0);
7078                                                 rdev->pm.vblank_sync = true;
7079                                                 wake_up(&rdev->irq.vblank_queue);
7080                                         }
7081                                         if (atomic_read(&rdev->irq.pflip[0]))
7082                                                 radeon_crtc_handle_flip(rdev, 0);
7083                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7084                                         DRM_DEBUG("IH: D1 vblank\n");
7085                                 }
7086                                 break;
7087                         case 1: /* D1 vline */
7088                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7089                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7090                                         DRM_DEBUG("IH: D1 vline\n");
7091                                 }
7092                                 break;
7093                         default:
7094                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7095                                 break;
7096                         }
7097                         break;
7098                 case 2: /* D2 vblank/vline */
7099                         switch (src_data) {
7100                         case 0: /* D2 vblank */
7101                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7102                                         if (rdev->irq.crtc_vblank_int[1]) {
7103                                                 drm_handle_vblank(rdev->ddev, 1);
7104                                                 rdev->pm.vblank_sync = true;
7105                                                 wake_up(&rdev->irq.vblank_queue);
7106                                         }
7107                                         if (atomic_read(&rdev->irq.pflip[1]))
7108                                                 radeon_crtc_handle_flip(rdev, 1);
7109                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7110                                         DRM_DEBUG("IH: D2 vblank\n");
7111                                 }
7112                                 break;
7113                         case 1: /* D2 vline */
7114                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7115                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7116                                         DRM_DEBUG("IH: D2 vline\n");
7117                                 }
7118                                 break;
7119                         default:
7120                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7121                                 break;
7122                         }
7123                         break;
7124                 case 3: /* D3 vblank/vline */
7125                         switch (src_data) {
7126                         case 0: /* D3 vblank */
7127                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7128                                         if (rdev->irq.crtc_vblank_int[2]) {
7129                                                 drm_handle_vblank(rdev->ddev, 2);
7130                                                 rdev->pm.vblank_sync = true;
7131                                                 wake_up(&rdev->irq.vblank_queue);
7132                                         }
7133                                         if (atomic_read(&rdev->irq.pflip[2]))
7134                                                 radeon_crtc_handle_flip(rdev, 2);
7135                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7136                                         DRM_DEBUG("IH: D3 vblank\n");
7137                                 }
7138                                 break;
7139                         case 1: /* D3 vline */
7140                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7141                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7142                                         DRM_DEBUG("IH: D3 vline\n");
7143                                 }
7144                                 break;
7145                         default:
7146                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7147                                 break;
7148                         }
7149                         break;
7150                 case 4: /* D4 vblank/vline */
7151                         switch (src_data) {
7152                         case 0: /* D4 vblank */
7153                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7154                                         if (rdev->irq.crtc_vblank_int[3]) {
7155                                                 drm_handle_vblank(rdev->ddev, 3);
7156                                                 rdev->pm.vblank_sync = true;
7157                                                 wake_up(&rdev->irq.vblank_queue);
7158                                         }
7159                                         if (atomic_read(&rdev->irq.pflip[3]))
7160                                                 radeon_crtc_handle_flip(rdev, 3);
7161                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7162                                         DRM_DEBUG("IH: D4 vblank\n");
7163                                 }
7164                                 break;
7165                         case 1: /* D4 vline */
7166                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7167                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7168                                         DRM_DEBUG("IH: D4 vline\n");
7169                                 }
7170                                 break;
7171                         default:
7172                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7173                                 break;
7174                         }
7175                         break;
7176                 case 5: /* D5 vblank/vline */
7177                         switch (src_data) {
7178                         case 0: /* D5 vblank */
7179                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7180                                         if (rdev->irq.crtc_vblank_int[4]) {
7181                                                 drm_handle_vblank(rdev->ddev, 4);
7182                                                 rdev->pm.vblank_sync = true;
7183                                                 wake_up(&rdev->irq.vblank_queue);
7184                                         }
7185                                         if (atomic_read(&rdev->irq.pflip[4]))
7186                                                 radeon_crtc_handle_flip(rdev, 4);
7187                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7188                                         DRM_DEBUG("IH: D5 vblank\n");
7189                                 }
7190                                 break;
7191                         case 1: /* D5 vline */
7192                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7193                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7194                                         DRM_DEBUG("IH: D5 vline\n");
7195                                 }
7196                                 break;
7197                         default:
7198                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7199                                 break;
7200                         }
7201                         break;
7202                 case 6: /* D6 vblank/vline */
7203                         switch (src_data) {
7204                         case 0: /* D6 vblank */
7205                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7206                                         if (rdev->irq.crtc_vblank_int[5]) {
7207                                                 drm_handle_vblank(rdev->ddev, 5);
7208                                                 rdev->pm.vblank_sync = true;
7209                                                 wake_up(&rdev->irq.vblank_queue);
7210                                         }
7211                                         if (atomic_read(&rdev->irq.pflip[5]))
7212                                                 radeon_crtc_handle_flip(rdev, 5);
7213                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7214                                         DRM_DEBUG("IH: D6 vblank\n");
7215                                 }
7216                                 break;
7217                         case 1: /* D6 vline */
7218                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7219                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7220                                         DRM_DEBUG("IH: D6 vline\n");
7221                                 }
7222                                 break;
7223                         default:
7224                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7225                                 break;
7226                         }
7227                         break;
7228                 case 42: /* HPD hotplug */
7229                         switch (src_data) {
7230                         case 0:
7231                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7232                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7233                                         queue_hotplug = true;
7234                                         DRM_DEBUG("IH: HPD1\n");
7235                                 }
7236                                 break;
7237                         case 1:
7238                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7239                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7240                                         queue_hotplug = true;
7241                                         DRM_DEBUG("IH: HPD2\n");
7242                                 }
7243                                 break;
7244                         case 2:
7245                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7246                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7247                                         queue_hotplug = true;
7248                                         DRM_DEBUG("IH: HPD3\n");
7249                                 }
7250                                 break;
7251                         case 3:
7252                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7253                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7254                                         queue_hotplug = true;
7255                                         DRM_DEBUG("IH: HPD4\n");
7256                                 }
7257                                 break;
7258                         case 4:
7259                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7260                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7261                                         queue_hotplug = true;
7262                                         DRM_DEBUG("IH: HPD5\n");
7263                                 }
7264                                 break;
7265                         case 5:
7266                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7267                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7268                                         queue_hotplug = true;
7269                                         DRM_DEBUG("IH: HPD6\n");
7270                                 }
7271                                 break;
7272                         default:
7273                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7274                                 break;
7275                         }
7276                         break;
7277                 case 124: /* UVD */
7278                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7279                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7280                         break;
7281                 case 146:
7282                 case 147:
7283                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7284                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7285                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7286                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7287                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7288                                 addr);
7289                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7290                                 status);
7291                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7292                         /* reset addr and status */
7293                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7294                         break;
7295                 case 176: /* GFX RB CP_INT */
7296                 case 177: /* GFX IB CP_INT */
7297                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7298                         break;
7299                 case 181: /* CP EOP event */
7300                         DRM_DEBUG("IH: CP EOP\n");
7301                         /* XXX check the bitfield order! */
7302                         me_id = (ring_id & 0x60) >> 5;
7303                         pipe_id = (ring_id & 0x18) >> 3;
7304                         queue_id = (ring_id & 0x7) >> 0;
7305                         switch (me_id) {
7306                         case 0:
7307                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7308                                 break;
7309                         case 1:
7310                         case 2:
7311                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7312                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7313                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7314                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7315                                 break;
7316                         }
7317                         break;
7318                 case 184: /* CP Privileged reg access */
7319                         DRM_ERROR("Illegal register access in command stream\n");
7320                         /* XXX check the bitfield order! */
7321                         me_id = (ring_id & 0x60) >> 5;
7322                         pipe_id = (ring_id & 0x18) >> 3;
7323                         queue_id = (ring_id & 0x7) >> 0;
7324                         switch (me_id) {
7325                         case 0:
7326                                 /* This results in a full GPU reset, but all we need to do is soft
7327                                  * reset the CP for gfx
7328                                  */
7329                                 queue_reset = true;
7330                                 break;
7331                         case 1:
7332                                 /* XXX compute */
7333                                 queue_reset = true;
7334                                 break;
7335                         case 2:
7336                                 /* XXX compute */
7337                                 queue_reset = true;
7338                                 break;
7339                         }
7340                         break;
7341                 case 185: /* CP Privileged inst */
7342                         DRM_ERROR("Illegal instruction in command stream\n");
7343                         /* XXX check the bitfield order! */
7344                         me_id = (ring_id & 0x60) >> 5;
7345                         pipe_id = (ring_id & 0x18) >> 3;
7346                         queue_id = (ring_id & 0x7) >> 0;
7347                         switch (me_id) {
7348                         case 0:
7349                                 /* This results in a full GPU reset, but all we need to do is soft
7350                                  * reset the CP for gfx
7351                                  */
7352                                 queue_reset = true;
7353                                 break;
7354                         case 1:
7355                                 /* XXX compute */
7356                                 queue_reset = true;
7357                                 break;
7358                         case 2:
7359                                 /* XXX compute */
7360                                 queue_reset = true;
7361                                 break;
7362                         }
7363                         break;
7364                 case 224: /* SDMA trap event */
7365                         /* XXX check the bitfield order! */
7366                         me_id = (ring_id & 0x3) >> 0;
7367                         queue_id = (ring_id & 0xc) >> 2;
7368                         DRM_DEBUG("IH: SDMA trap\n");
7369                         switch (me_id) {
7370                         case 0:
7371                                 switch (queue_id) {
7372                                 case 0:
7373                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7374                                         break;
7375                                 case 1:
7376                                         /* XXX compute */
7377                                         break;
7378                                 case 2:
7379                                         /* XXX compute */
7380                                         break;
7381                                 }
7382                                 break;
7383                         case 1:
7384                                 switch (queue_id) {
7385                                 case 0:
7386                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7387                                         break;
7388                                 case 1:
7389                                         /* XXX compute */
7390                                         break;
7391                                 case 2:
7392                                         /* XXX compute */
7393                                         break;
7394                                 }
7395                                 break;
7396                         }
7397                         break;
7398                 case 230: /* thermal low to high */
7399                         DRM_DEBUG("IH: thermal low to high\n");
7400                         rdev->pm.dpm.thermal.high_to_low = false;
7401                         queue_thermal = true;
7402                         break;
7403                 case 231: /* thermal high to low */
7404                         DRM_DEBUG("IH: thermal high to low\n");
7405                         rdev->pm.dpm.thermal.high_to_low = true;
7406                         queue_thermal = true;
7407                         break;
7408                 case 233: /* GUI IDLE */
7409                         DRM_DEBUG("IH: GUI idle\n");
7410                         break;
7411                 case 241: /* SDMA Privileged inst */
7412                 case 247: /* SDMA Privileged inst */
7413                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7414                         /* XXX check the bitfield order! */
7415                         me_id = (ring_id & 0x3) >> 0;
7416                         queue_id = (ring_id & 0xc) >> 2;
7417                         switch (me_id) {
7418                         case 0:
7419                                 switch (queue_id) {
7420                                 case 0:
7421                                         queue_reset = true;
7422                                         break;
7423                                 case 1:
7424                                         /* XXX compute */
7425                                         queue_reset = true;
7426                                         break;
7427                                 case 2:
7428                                         /* XXX compute */
7429                                         queue_reset = true;
7430                                         break;
7431                                 }
7432                                 break;
7433                         case 1:
7434                                 switch (queue_id) {
7435                                 case 0:
7436                                         queue_reset = true;
7437                                         break;
7438                                 case 1:
7439                                         /* XXX compute */
7440                                         queue_reset = true;
7441                                         break;
7442                                 case 2:
7443                                         /* XXX compute */
7444                                         queue_reset = true;
7445                                         break;
7446                                 }
7447                                 break;
7448                         }
7449                         break;
7450                 default:
7451                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7452                         break;
7453                 }
7454
7455                 /* wptr/rptr are in bytes! */
7456                 rptr += 16;
7457                 rptr &= rdev->ih.ptr_mask;
7458         }
7459         if (queue_hotplug)
7460                 schedule_work(&rdev->hotplug_work);
7461         if (queue_reset)
7462                 schedule_work(&rdev->reset_work);
7463         if (queue_thermal)
7464                 schedule_work(&rdev->pm.dpm.thermal.work);
7465         rdev->ih.rptr = rptr;
7466         WREG32(IH_RB_RPTR, rdev->ih.rptr);
7467         atomic_set(&rdev->ih.lock, 0);
7468
7469         /* make sure wptr hasn't changed while processing */
7470         wptr = cik_get_ih_wptr(rdev);
7471         if (wptr != rptr)
7472                 goto restart_ih;
7473
7474         return IRQ_HANDLED;
7475 }
7476
7477 /*
7478  * startup/shutdown callbacks
7479  */
7480 /**
7481  * cik_startup - program the asic to a functional state
7482  *
7483  * @rdev: radeon_device pointer
7484  *
7485  * Programs the asic to a functional state (CIK).
7486  * Called by cik_init() and cik_resume().
7487  * Returns 0 for success, error for failure.
7488  */
7489 static int cik_startup(struct radeon_device *rdev)
7490 {
7491         struct radeon_ring *ring;
7492         int r;
7493
7494         /* enable pcie gen2/3 link */
7495         cik_pcie_gen3_enable(rdev);
7496         /* enable aspm */
7497         cik_program_aspm(rdev);
7498
7499         /* scratch needs to be initialized before MC */
7500         r = r600_vram_scratch_init(rdev);
7501         if (r)
7502                 return r;
7503
7504         cik_mc_program(rdev);
7505
7506         if (rdev->flags & RADEON_IS_IGP) {
7507                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7508                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7509                         r = cik_init_microcode(rdev);
7510                         if (r) {
7511                                 DRM_ERROR("Failed to load firmware!\n");
7512                                 return r;
7513                         }
7514                 }
7515         } else {
7516                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7517                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7518                     !rdev->mc_fw) {
7519                         r = cik_init_microcode(rdev);
7520                         if (r) {
7521                                 DRM_ERROR("Failed to load firmware!\n");
7522                                 return r;
7523                         }
7524                 }
7525
7526                 r = ci_mc_load_microcode(rdev);
7527                 if (r) {
7528                         DRM_ERROR("Failed to load MC firmware!\n");
7529                         return r;
7530                 }
7531         }
7532
7533         r = cik_pcie_gart_enable(rdev);
7534         if (r)
7535                 return r;
7536         cik_gpu_init(rdev);
7537
7538         /* allocate rlc buffers */
7539         if (rdev->flags & RADEON_IS_IGP) {
7540                 if (rdev->family == CHIP_KAVERI) {
7541                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7542                         rdev->rlc.reg_list_size =
7543                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7544                 } else {
7545                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7546                         rdev->rlc.reg_list_size =
7547                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7548                 }
7549         }
7550         rdev->rlc.cs_data = ci_cs_data;
7551         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7552         r = sumo_rlc_init(rdev);
7553         if (r) {
7554                 DRM_ERROR("Failed to init rlc BOs!\n");
7555                 return r;
7556         }
7557
7558         /* allocate wb buffer */
7559         r = radeon_wb_init(rdev);
7560         if (r)
7561                 return r;
7562
7563         /* allocate mec buffers */
7564         r = cik_mec_init(rdev);
7565         if (r) {
7566                 DRM_ERROR("Failed to init MEC BOs!\n");
7567                 return r;
7568         }
7569
7570         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7571         if (r) {
7572                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7573                 return r;
7574         }
7575
7576         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7577         if (r) {
7578                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7579                 return r;
7580         }
7581
7582         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7583         if (r) {
7584                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7585                 return r;
7586         }
7587
7588         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7589         if (r) {
7590                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7591                 return r;
7592         }
7593
7594         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7595         if (r) {
7596                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7597                 return r;
7598         }
7599
7600         r = radeon_uvd_resume(rdev);
7601         if (!r) {
7602                 r = uvd_v4_2_resume(rdev);
7603                 if (!r) {
7604                         r = radeon_fence_driver_start_ring(rdev,
7605                                                            R600_RING_TYPE_UVD_INDEX);
7606                         if (r)
7607                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7608                 }
7609         }
7610         if (r)
7611                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7612
7613         /* Enable IRQ */
7614         if (!rdev->irq.installed) {
7615                 r = radeon_irq_kms_init(rdev);
7616                 if (r)
7617                         return r;
7618         }
7619
7620         r = cik_irq_init(rdev);
7621         if (r) {
7622                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7623                 radeon_irq_kms_fini(rdev);
7624                 return r;
7625         }
7626         cik_irq_set(rdev);
7627
7628         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7629         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7630                              CP_RB0_RPTR, CP_RB0_WPTR,
7631                              PACKET3(PACKET3_NOP, 0x3FFF));
7632         if (r)
7633                 return r;
7634
7635         /* set up the compute queues */
7636         /* type-2 packets are deprecated on MEC, use type-3 instead */
7637         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7638         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7639                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7640                              PACKET3(PACKET3_NOP, 0x3FFF));
7641         if (r)
7642                 return r;
7643         ring->me = 1; /* first MEC */
7644         ring->pipe = 0; /* first pipe */
7645         ring->queue = 0; /* first queue */
7646         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7647
7648         /* type-2 packets are deprecated on MEC, use type-3 instead */
7649         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7650         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7651                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7652                              PACKET3(PACKET3_NOP, 0x3FFF));
7653         if (r)
7654                 return r;
7655         /* dGPU only have 1 MEC */
7656         ring->me = 1; /* first MEC */
7657         ring->pipe = 0; /* first pipe */
7658         ring->queue = 1; /* second queue */
7659         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7660
7661         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7662         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7663                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7664                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7665                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7666         if (r)
7667                 return r;
7668
7669         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7670         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7671                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7672                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7673                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7674         if (r)
7675                 return r;
7676
7677         r = cik_cp_resume(rdev);
7678         if (r)
7679                 return r;
7680
7681         r = cik_sdma_resume(rdev);
7682         if (r)
7683                 return r;
7684
7685         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7686         if (ring->ring_size) {
7687                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7688                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7689                                      RADEON_CP_PACKET2);
7690                 if (!r)
7691                         r = uvd_v1_0_init(rdev);
7692                 if (r)
7693                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7694         }
7695
7696         r = radeon_ib_pool_init(rdev);
7697         if (r) {
7698                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7699                 return r;
7700         }
7701
7702         r = radeon_vm_manager_init(rdev);
7703         if (r) {
7704                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7705                 return r;
7706         }
7707
7708         r = dce6_audio_init(rdev);
7709         if (r)
7710                 return r;
7711
7712         return 0;
7713 }
7714
7715 /**
7716  * cik_resume - resume the asic to a functional state
7717  *
7718  * @rdev: radeon_device pointer
7719  *
7720  * Programs the asic to a functional state (CIK).
7721  * Called at resume.
7722  * Returns 0 for success, error for failure.
7723  */
7724 int cik_resume(struct radeon_device *rdev)
7725 {
7726         int r;
7727
7728         /* post card */
7729         atom_asic_init(rdev->mode_info.atom_context);
7730
7731         /* init golden registers */
7732         cik_init_golden_registers(rdev);
7733
7734         rdev->accel_working = true;
7735         r = cik_startup(rdev);
7736         if (r) {
7737                 DRM_ERROR("cik startup failed on resume\n");
7738                 rdev->accel_working = false;
7739                 return r;
7740         }
7741
7742         return r;
7743
7744 }
7745
7746 /**
7747  * cik_suspend - suspend the asic
7748  *
7749  * @rdev: radeon_device pointer
7750  *
7751  * Bring the chip into a state suitable for suspend (CIK).
7752  * Called at suspend.
7753  * Returns 0 for success.
7754  */
7755 int cik_suspend(struct radeon_device *rdev)
7756 {
7757         dce6_audio_fini(rdev);
7758         radeon_vm_manager_fini(rdev);
7759         cik_cp_enable(rdev, false);
7760         cik_sdma_enable(rdev, false);
7761         uvd_v1_0_fini(rdev);
7762         radeon_uvd_suspend(rdev);
7763         cik_fini_pg(rdev);
7764         cik_fini_cg(rdev);
7765         cik_irq_suspend(rdev);
7766         radeon_wb_disable(rdev);
7767         cik_pcie_gart_disable(rdev);
7768         return 0;
7769 }
7770
7771 /* Plan is to move initialization in that function and use
7772  * helper function so that radeon_device_init pretty much
7773  * do nothing more than calling asic specific function. This
7774  * should also allow to remove a bunch of callback function
7775  * like vram_info.
7776  */
7777 /**
7778  * cik_init - asic specific driver and hw init
7779  *
7780  * @rdev: radeon_device pointer
7781  *
7782  * Setup asic specific driver variables and program the hw
7783  * to a functional state (CIK).
7784  * Called at driver startup.
7785  * Returns 0 for success, errors for failure.
7786  */
7787 int cik_init(struct radeon_device *rdev)
7788 {
7789         struct radeon_ring *ring;
7790         int r;
7791
7792         /* Read BIOS */
7793         if (!radeon_get_bios(rdev)) {
7794                 if (ASIC_IS_AVIVO(rdev))
7795                         return -EINVAL;
7796         }
7797         /* Must be an ATOMBIOS */
7798         if (!rdev->is_atom_bios) {
7799                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7800                 return -EINVAL;
7801         }
7802         r = radeon_atombios_init(rdev);
7803         if (r)
7804                 return r;
7805
7806         /* Post card if necessary */
7807         if (!radeon_card_posted(rdev)) {
7808                 if (!rdev->bios) {
7809                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7810                         return -EINVAL;
7811                 }
7812                 DRM_INFO("GPU not posted. posting now...\n");
7813                 atom_asic_init(rdev->mode_info.atom_context);
7814         }
7815         /* init golden registers */
7816         cik_init_golden_registers(rdev);
7817         /* Initialize scratch registers */
7818         cik_scratch_init(rdev);
7819         /* Initialize surface registers */
7820         radeon_surface_init(rdev);
7821         /* Initialize clocks */
7822         radeon_get_clock_info(rdev->ddev);
7823
7824         /* Fence driver */
7825         r = radeon_fence_driver_init(rdev);
7826         if (r)
7827                 return r;
7828
7829         /* initialize memory controller */
7830         r = cik_mc_init(rdev);
7831         if (r)
7832                 return r;
7833         /* Memory manager */
7834         r = radeon_bo_init(rdev);
7835         if (r)
7836                 return r;
7837
7838         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7839         ring->ring_obj = NULL;
7840         r600_ring_init(rdev, ring, 1024 * 1024);
7841
7842         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7843         ring->ring_obj = NULL;
7844         r600_ring_init(rdev, ring, 1024 * 1024);
7845         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7846         if (r)
7847                 return r;
7848
7849         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7850         ring->ring_obj = NULL;
7851         r600_ring_init(rdev, ring, 1024 * 1024);
7852         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7853         if (r)
7854                 return r;
7855
7856         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7857         ring->ring_obj = NULL;
7858         r600_ring_init(rdev, ring, 256 * 1024);
7859
7860         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7861         ring->ring_obj = NULL;
7862         r600_ring_init(rdev, ring, 256 * 1024);
7863
7864         r = radeon_uvd_init(rdev);
7865         if (!r) {
7866                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7867                 ring->ring_obj = NULL;
7868                 r600_ring_init(rdev, ring, 4096);
7869         }
7870
7871         rdev->ih.ring_obj = NULL;
7872         r600_ih_ring_init(rdev, 64 * 1024);
7873
7874         r = r600_pcie_gart_init(rdev);
7875         if (r)
7876                 return r;
7877
7878         rdev->accel_working = true;
7879         r = cik_startup(rdev);
7880         if (r) {
7881                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7882                 cik_cp_fini(rdev);
7883                 cik_sdma_fini(rdev);
7884                 cik_irq_fini(rdev);
7885                 sumo_rlc_fini(rdev);
7886                 cik_mec_fini(rdev);
7887                 radeon_wb_fini(rdev);
7888                 radeon_ib_pool_fini(rdev);
7889                 radeon_vm_manager_fini(rdev);
7890                 radeon_irq_kms_fini(rdev);
7891                 cik_pcie_gart_fini(rdev);
7892                 rdev->accel_working = false;
7893         }
7894
7895         /* Don't start up if the MC ucode is missing.
7896          * The default clocks and voltages before the MC ucode
7897          * is loaded are not suffient for advanced operations.
7898          */
7899         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7900                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7901                 return -EINVAL;
7902         }
7903
7904         return 0;
7905 }
7906
7907 /**
7908  * cik_fini - asic specific driver and hw fini
7909  *
7910  * @rdev: radeon_device pointer
7911  *
7912  * Tear down the asic specific driver variables and program the hw
7913  * to an idle state (CIK).
7914  * Called at driver unload.
7915  */
7916 void cik_fini(struct radeon_device *rdev)
7917 {
7918         cik_cp_fini(rdev);
7919         cik_sdma_fini(rdev);
7920         cik_fini_pg(rdev);
7921         cik_fini_cg(rdev);
7922         cik_irq_fini(rdev);
7923         sumo_rlc_fini(rdev);
7924         cik_mec_fini(rdev);
7925         radeon_wb_fini(rdev);
7926         radeon_vm_manager_fini(rdev);
7927         radeon_ib_pool_fini(rdev);
7928         radeon_irq_kms_fini(rdev);
7929         uvd_v1_0_fini(rdev);
7930         radeon_uvd_fini(rdev);
7931         cik_pcie_gart_fini(rdev);
7932         r600_vram_scratch_fini(rdev);
7933         radeon_gem_fini(rdev);
7934         radeon_fence_driver_fini(rdev);
7935         radeon_bo_fini(rdev);
7936         radeon_atombios_fini(rdev);
7937         kfree(rdev->bios);
7938         rdev->bios = NULL;
7939 }
7940
7941 void dce8_program_fmt(struct drm_encoder *encoder)
7942 {
7943         struct drm_device *dev = encoder->dev;
7944         struct radeon_device *rdev = dev->dev_private;
7945         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7946         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7947         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7948         int bpc = 0;
7949         u32 tmp = 0;
7950         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7951
7952         if (connector) {
7953                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7954                 bpc = radeon_get_monitor_bpc(connector);
7955                 dither = radeon_connector->dither;
7956         }
7957
7958         /* LVDS/eDP FMT is set up by atom */
7959         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7960                 return;
7961
7962         /* not needed for analog */
7963         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7964             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7965                 return;
7966
7967         if (bpc == 0)
7968                 return;
7969
7970         switch (bpc) {
7971         case 6:
7972                 if (dither == RADEON_FMT_DITHER_ENABLE)
7973                         /* XXX sort out optimal dither settings */
7974                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7975                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7976                 else
7977                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7978                 break;
7979         case 8:
7980                 if (dither == RADEON_FMT_DITHER_ENABLE)
7981                         /* XXX sort out optimal dither settings */
7982                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7983                                 FMT_RGB_RANDOM_ENABLE |
7984                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7985                 else
7986                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7987                 break;
7988         case 10:
7989                 if (dither == RADEON_FMT_DITHER_ENABLE)
7990                         /* XXX sort out optimal dither settings */
7991                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7992                                 FMT_RGB_RANDOM_ENABLE |
7993                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7994                 else
7995                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7996                 break;
7997         default:
7998                 /* not needed */
7999                 break;
8000         }
8001
8002         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8003 }
8004
8005 /* display watermark setup */
8006 /**
8007  * dce8_line_buffer_adjust - Set up the line buffer
8008  *
8009  * @rdev: radeon_device pointer
8010  * @radeon_crtc: the selected display controller
8011  * @mode: the current display mode on the selected display
8012  * controller
8013  *
8014  * Setup up the line buffer allocation for
8015  * the selected display controller (CIK).
8016  * Returns the line buffer size in pixels.
8017  */
8018 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8019                                    struct radeon_crtc *radeon_crtc,
8020                                    struct drm_display_mode *mode)
8021 {
8022         u32 tmp, buffer_alloc, i;
8023         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8024         /*
8025          * Line Buffer Setup
8026          * There are 6 line buffers, one for each display controllers.
8027          * There are 3 partitions per LB. Select the number of partitions
8028          * to enable based on the display width.  For display widths larger
8029          * than 4096, you need use to use 2 display controllers and combine
8030          * them using the stereo blender.
8031          */
8032         if (radeon_crtc->base.enabled && mode) {
8033                 if (mode->crtc_hdisplay < 1920) {
8034                         tmp = 1;
8035                         buffer_alloc = 2;
8036                 } else if (mode->crtc_hdisplay < 2560) {
8037                         tmp = 2;
8038                         buffer_alloc = 2;
8039                 } else if (mode->crtc_hdisplay < 4096) {
8040                         tmp = 0;
8041                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8042                 } else {
8043                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8044                         tmp = 0;
8045                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8046                 }
8047         } else {
8048                 tmp = 1;
8049                 buffer_alloc = 0;
8050         }
8051
8052         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8053                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8054
8055         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8056                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8057         for (i = 0; i < rdev->usec_timeout; i++) {
8058                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8059                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8060                         break;
8061                 udelay(1);
8062         }
8063
8064         if (radeon_crtc->base.enabled && mode) {
8065                 switch (tmp) {
8066                 case 0:
8067                 default:
8068                         return 4096 * 2;
8069                 case 1:
8070                         return 1920 * 2;
8071                 case 2:
8072                         return 2560 * 2;
8073                 }
8074         }
8075
8076         /* controller not enabled, so no lb used */
8077         return 0;
8078 }
8079
8080 /**
8081  * cik_get_number_of_dram_channels - get the number of dram channels
8082  *
8083  * @rdev: radeon_device pointer
8084  *
8085  * Look up the number of video ram channels (CIK).
8086  * Used for display watermark bandwidth calculations
8087  * Returns the number of dram channels
8088  */
8089 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8090 {
8091         u32 tmp = RREG32(MC_SHARED_CHMAP);
8092
8093         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8094         case 0:
8095         default:
8096                 return 1;
8097         case 1:
8098                 return 2;
8099         case 2:
8100                 return 4;
8101         case 3:
8102                 return 8;
8103         case 4:
8104                 return 3;
8105         case 5:
8106                 return 6;
8107         case 6:
8108                 return 10;
8109         case 7:
8110                 return 12;
8111         case 8:
8112                 return 16;
8113         }
8114 }
8115
8116 struct dce8_wm_params {
8117         u32 dram_channels; /* number of dram channels */
8118         u32 yclk;          /* bandwidth per dram data pin in kHz */
8119         u32 sclk;          /* engine clock in kHz */
8120         u32 disp_clk;      /* display clock in kHz */
8121         u32 src_width;     /* viewport width */
8122         u32 active_time;   /* active display time in ns */
8123         u32 blank_time;    /* blank time in ns */
8124         bool interlaced;    /* mode is interlaced */
8125         fixed20_12 vsc;    /* vertical scale ratio */
8126         u32 num_heads;     /* number of active crtcs */
8127         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8128         u32 lb_size;       /* line buffer allocated to pipe */
8129         u32 vtaps;         /* vertical scaler taps */
8130 };
8131
8132 /**
8133  * dce8_dram_bandwidth - get the dram bandwidth
8134  *
8135  * @wm: watermark calculation data
8136  *
8137  * Calculate the raw dram bandwidth (CIK).
8138  * Used for display watermark bandwidth calculations
8139  * Returns the dram bandwidth in MBytes/s
8140  */
8141 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8142 {
8143         /* Calculate raw DRAM Bandwidth */
8144         fixed20_12 dram_efficiency; /* 0.7 */
8145         fixed20_12 yclk, dram_channels, bandwidth;
8146         fixed20_12 a;
8147
8148         a.full = dfixed_const(1000);
8149         yclk.full = dfixed_const(wm->yclk);
8150         yclk.full = dfixed_div(yclk, a);
8151         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8152         a.full = dfixed_const(10);
8153         dram_efficiency.full = dfixed_const(7);
8154         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8155         bandwidth.full = dfixed_mul(dram_channels, yclk);
8156         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8157
8158         return dfixed_trunc(bandwidth);
8159 }
8160
8161 /**
8162  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8163  *
8164  * @wm: watermark calculation data
8165  *
8166  * Calculate the dram bandwidth used for display (CIK).
8167  * Used for display watermark bandwidth calculations
8168  * Returns the dram bandwidth for display in MBytes/s
8169  */
8170 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8171 {
8172         /* Calculate DRAM Bandwidth and the part allocated to display. */
8173         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8174         fixed20_12 yclk, dram_channels, bandwidth;
8175         fixed20_12 a;
8176
8177         a.full = dfixed_const(1000);
8178         yclk.full = dfixed_const(wm->yclk);
8179         yclk.full = dfixed_div(yclk, a);
8180         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8181         a.full = dfixed_const(10);
8182         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8183         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8184         bandwidth.full = dfixed_mul(dram_channels, yclk);
8185         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8186
8187         return dfixed_trunc(bandwidth);
8188 }
8189
8190 /**
8191  * dce8_data_return_bandwidth - get the data return bandwidth
8192  *
8193  * @wm: watermark calculation data
8194  *
8195  * Calculate the data return bandwidth used for display (CIK).
8196  * Used for display watermark bandwidth calculations
8197  * Returns the data return bandwidth in MBytes/s
8198  */
8199 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8200 {
8201         /* Calculate the display Data return Bandwidth */
8202         fixed20_12 return_efficiency; /* 0.8 */
8203         fixed20_12 sclk, bandwidth;
8204         fixed20_12 a;
8205
8206         a.full = dfixed_const(1000);
8207         sclk.full = dfixed_const(wm->sclk);
8208         sclk.full = dfixed_div(sclk, a);
8209         a.full = dfixed_const(10);
8210         return_efficiency.full = dfixed_const(8);
8211         return_efficiency.full = dfixed_div(return_efficiency, a);
8212         a.full = dfixed_const(32);
8213         bandwidth.full = dfixed_mul(a, sclk);
8214         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8215
8216         return dfixed_trunc(bandwidth);
8217 }
8218
8219 /**
8220  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8221  *
8222  * @wm: watermark calculation data
8223  *
8224  * Calculate the dmif bandwidth used for display (CIK).
8225  * Used for display watermark bandwidth calculations
8226  * Returns the dmif bandwidth in MBytes/s
8227  */
8228 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8229 {
8230         /* Calculate the DMIF Request Bandwidth */
8231         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8232         fixed20_12 disp_clk, bandwidth;
8233         fixed20_12 a, b;
8234
8235         a.full = dfixed_const(1000);
8236         disp_clk.full = dfixed_const(wm->disp_clk);
8237         disp_clk.full = dfixed_div(disp_clk, a);
8238         a.full = dfixed_const(32);
8239         b.full = dfixed_mul(a, disp_clk);
8240
8241         a.full = dfixed_const(10);
8242         disp_clk_request_efficiency.full = dfixed_const(8);
8243         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8244
8245         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8246
8247         return dfixed_trunc(bandwidth);
8248 }
8249
8250 /**
8251  * dce8_available_bandwidth - get the min available bandwidth
8252  *
8253  * @wm: watermark calculation data
8254  *
8255  * Calculate the min available bandwidth used for display (CIK).
8256  * Used for display watermark bandwidth calculations
8257  * Returns the min available bandwidth in MBytes/s
8258  */
8259 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8260 {
8261         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8262         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8263         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8264         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8265
8266         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8267 }
8268
8269 /**
8270  * dce8_average_bandwidth - get the average available bandwidth
8271  *
8272  * @wm: watermark calculation data
8273  *
8274  * Calculate the average available bandwidth used for display (CIK).
8275  * Used for display watermark bandwidth calculations
8276  * Returns the average available bandwidth in MBytes/s
8277  */
8278 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8279 {
8280         /* Calculate the display mode Average Bandwidth
8281          * DisplayMode should contain the source and destination dimensions,
8282          * timing, etc.
8283          */
8284         fixed20_12 bpp;
8285         fixed20_12 line_time;
8286         fixed20_12 src_width;
8287         fixed20_12 bandwidth;
8288         fixed20_12 a;
8289
8290         a.full = dfixed_const(1000);
8291         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8292         line_time.full = dfixed_div(line_time, a);
8293         bpp.full = dfixed_const(wm->bytes_per_pixel);
8294         src_width.full = dfixed_const(wm->src_width);
8295         bandwidth.full = dfixed_mul(src_width, bpp);
8296         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8297         bandwidth.full = dfixed_div(bandwidth, line_time);
8298
8299         return dfixed_trunc(bandwidth);
8300 }
8301
8302 /**
8303  * dce8_latency_watermark - get the latency watermark
8304  *
8305  * @wm: watermark calculation data
8306  *
8307  * Calculate the latency watermark (CIK).
8308  * Used for display watermark bandwidth calculations
8309  * Returns the latency watermark in ns
8310  */
8311 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8312 {
8313         /* First calculate the latency in ns */
8314         u32 mc_latency = 2000; /* 2000 ns. */
8315         u32 available_bandwidth = dce8_available_bandwidth(wm);
8316         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8317         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8318         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8319         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8320                 (wm->num_heads * cursor_line_pair_return_time);
8321         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8322         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8323         u32 tmp, dmif_size = 12288;
8324         fixed20_12 a, b, c;
8325
8326         if (wm->num_heads == 0)
8327                 return 0;
8328
8329         a.full = dfixed_const(2);
8330         b.full = dfixed_const(1);
8331         if ((wm->vsc.full > a.full) ||
8332             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8333             (wm->vtaps >= 5) ||
8334             ((wm->vsc.full >= a.full) && wm->interlaced))
8335                 max_src_lines_per_dst_line = 4;
8336         else
8337                 max_src_lines_per_dst_line = 2;
8338
8339         a.full = dfixed_const(available_bandwidth);
8340         b.full = dfixed_const(wm->num_heads);
8341         a.full = dfixed_div(a, b);
8342
8343         b.full = dfixed_const(mc_latency + 512);
8344         c.full = dfixed_const(wm->disp_clk);
8345         b.full = dfixed_div(b, c);
8346
8347         c.full = dfixed_const(dmif_size);
8348         b.full = dfixed_div(c, b);
8349
8350         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8351
8352         b.full = dfixed_const(1000);
8353         c.full = dfixed_const(wm->disp_clk);
8354         b.full = dfixed_div(c, b);
8355         c.full = dfixed_const(wm->bytes_per_pixel);
8356         b.full = dfixed_mul(b, c);
8357
8358         lb_fill_bw = min(tmp, dfixed_trunc(b));
8359
8360         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8361         b.full = dfixed_const(1000);
8362         c.full = dfixed_const(lb_fill_bw);
8363         b.full = dfixed_div(c, b);
8364         a.full = dfixed_div(a, b);
8365         line_fill_time = dfixed_trunc(a);
8366
8367         if (line_fill_time < wm->active_time)
8368                 return latency;
8369         else
8370                 return latency + (line_fill_time - wm->active_time);
8371
8372 }
8373
8374 /**
8375  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8376  * average and available dram bandwidth
8377  *
8378  * @wm: watermark calculation data
8379  *
8380  * Check if the display average bandwidth fits in the display
8381  * dram bandwidth (CIK).
8382  * Used for display watermark bandwidth calculations
8383  * Returns true if the display fits, false if not.
8384  */
8385 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8386 {
8387         if (dce8_average_bandwidth(wm) <=
8388             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8389                 return true;
8390         else
8391                 return false;
8392 }
8393
8394 /**
8395  * dce8_average_bandwidth_vs_available_bandwidth - check
8396  * average and available bandwidth
8397  *
8398  * @wm: watermark calculation data
8399  *
8400  * Check if the display average bandwidth fits in the display
8401  * available bandwidth (CIK).
8402  * Used for display watermark bandwidth calculations
8403  * Returns true if the display fits, false if not.
8404  */
8405 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8406 {
8407         if (dce8_average_bandwidth(wm) <=
8408             (dce8_available_bandwidth(wm) / wm->num_heads))
8409                 return true;
8410         else
8411                 return false;
8412 }
8413
8414 /**
8415  * dce8_check_latency_hiding - check latency hiding
8416  *
8417  * @wm: watermark calculation data
8418  *
8419  * Check latency hiding (CIK).
8420  * Used for display watermark bandwidth calculations
8421  * Returns true if the display fits, false if not.
8422  */
8423 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8424 {
8425         u32 lb_partitions = wm->lb_size / wm->src_width;
8426         u32 line_time = wm->active_time + wm->blank_time;
8427         u32 latency_tolerant_lines;
8428         u32 latency_hiding;
8429         fixed20_12 a;
8430
8431         a.full = dfixed_const(1);
8432         if (wm->vsc.full > a.full)
8433                 latency_tolerant_lines = 1;
8434         else {
8435                 if (lb_partitions <= (wm->vtaps + 1))
8436                         latency_tolerant_lines = 1;
8437                 else
8438                         latency_tolerant_lines = 2;
8439         }
8440
8441         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8442
8443         if (dce8_latency_watermark(wm) <= latency_hiding)
8444                 return true;
8445         else
8446                 return false;
8447 }
8448
8449 /**
8450  * dce8_program_watermarks - program display watermarks
8451  *
8452  * @rdev: radeon_device pointer
8453  * @radeon_crtc: the selected display controller
8454  * @lb_size: line buffer size
8455  * @num_heads: number of display controllers in use
8456  *
8457  * Calculate and program the display watermarks for the
8458  * selected display controller (CIK).
8459  */
8460 static void dce8_program_watermarks(struct radeon_device *rdev,
8461                                     struct radeon_crtc *radeon_crtc,
8462                                     u32 lb_size, u32 num_heads)
8463 {
8464         struct drm_display_mode *mode = &radeon_crtc->base.mode;
8465         struct dce8_wm_params wm_low, wm_high;
8466         u32 pixel_period;
8467         u32 line_time = 0;
8468         u32 latency_watermark_a = 0, latency_watermark_b = 0;
8469         u32 tmp, wm_mask;
8470
8471         if (radeon_crtc->base.enabled && num_heads && mode) {
8472                 pixel_period = 1000000 / (u32)mode->clock;
8473                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8474
8475                 /* watermark for high clocks */
8476                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8477                     rdev->pm.dpm_enabled) {
8478                         wm_high.yclk =
8479                                 radeon_dpm_get_mclk(rdev, false) * 10;
8480                         wm_high.sclk =
8481                                 radeon_dpm_get_sclk(rdev, false) * 10;
8482                 } else {
8483                         wm_high.yclk = rdev->pm.current_mclk * 10;
8484                         wm_high.sclk = rdev->pm.current_sclk * 10;
8485                 }
8486
8487                 wm_high.disp_clk = mode->clock;
8488                 wm_high.src_width = mode->crtc_hdisplay;
8489                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8490                 wm_high.blank_time = line_time - wm_high.active_time;
8491                 wm_high.interlaced = false;
8492                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8493                         wm_high.interlaced = true;
8494                 wm_high.vsc = radeon_crtc->vsc;
8495                 wm_high.vtaps = 1;
8496                 if (radeon_crtc->rmx_type != RMX_OFF)
8497                         wm_high.vtaps = 2;
8498                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8499                 wm_high.lb_size = lb_size;
8500                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8501                 wm_high.num_heads = num_heads;
8502
8503                 /* set for high clocks */
8504                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8505
8506                 /* possibly force display priority to high */
8507                 /* should really do this at mode validation time... */
8508                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8509                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8510                     !dce8_check_latency_hiding(&wm_high) ||
8511                     (rdev->disp_priority == 2)) {
8512                         DRM_DEBUG_KMS("force priority to high\n");
8513                 }
8514
8515                 /* watermark for low clocks */
8516                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8517                     rdev->pm.dpm_enabled) {
8518                         wm_low.yclk =
8519                                 radeon_dpm_get_mclk(rdev, true) * 10;
8520                         wm_low.sclk =
8521                                 radeon_dpm_get_sclk(rdev, true) * 10;
8522                 } else {
8523                         wm_low.yclk = rdev->pm.current_mclk * 10;
8524                         wm_low.sclk = rdev->pm.current_sclk * 10;
8525                 }
8526
8527                 wm_low.disp_clk = mode->clock;
8528                 wm_low.src_width = mode->crtc_hdisplay;
8529                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8530                 wm_low.blank_time = line_time - wm_low.active_time;
8531                 wm_low.interlaced = false;
8532                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8533                         wm_low.interlaced = true;
8534                 wm_low.vsc = radeon_crtc->vsc;
8535                 wm_low.vtaps = 1;
8536                 if (radeon_crtc->rmx_type != RMX_OFF)
8537                         wm_low.vtaps = 2;
8538                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8539                 wm_low.lb_size = lb_size;
8540                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8541                 wm_low.num_heads = num_heads;
8542
8543                 /* set for low clocks */
8544                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8545
8546                 /* possibly force display priority to high */
8547                 /* should really do this at mode validation time... */
8548                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8549                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8550                     !dce8_check_latency_hiding(&wm_low) ||
8551                     (rdev->disp_priority == 2)) {
8552                         DRM_DEBUG_KMS("force priority to high\n");
8553                 }
8554         }
8555
8556         /* select wm A */
8557         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8558         tmp = wm_mask;
8559         tmp &= ~LATENCY_WATERMARK_MASK(3);
8560         tmp |= LATENCY_WATERMARK_MASK(1);
8561         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8562         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8563                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8564                 LATENCY_HIGH_WATERMARK(line_time)));
8565         /* select wm B */
8566         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8567         tmp &= ~LATENCY_WATERMARK_MASK(3);
8568         tmp |= LATENCY_WATERMARK_MASK(2);
8569         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8570         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8571                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8572                 LATENCY_HIGH_WATERMARK(line_time)));
8573         /* restore original selection */
8574         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8575
8576         /* save values for DPM */
8577         radeon_crtc->line_time = line_time;
8578         radeon_crtc->wm_high = latency_watermark_a;
8579         radeon_crtc->wm_low = latency_watermark_b;
8580 }
8581
8582 /**
8583  * dce8_bandwidth_update - program display watermarks
8584  *
8585  * @rdev: radeon_device pointer
8586  *
8587  * Calculate and program the display watermarks and line
8588  * buffer allocation (CIK).
8589  */
8590 void dce8_bandwidth_update(struct radeon_device *rdev)
8591 {
8592         struct drm_display_mode *mode = NULL;
8593         u32 num_heads = 0, lb_size;
8594         int i;
8595
8596         radeon_update_display_priority(rdev);
8597
8598         for (i = 0; i < rdev->num_crtc; i++) {
8599                 if (rdev->mode_info.crtcs[i]->base.enabled)
8600                         num_heads++;
8601         }
8602         for (i = 0; i < rdev->num_crtc; i++) {
8603                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8604                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8605                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8606         }
8607 }
8608
8609 /**
8610  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8611  *
8612  * @rdev: radeon_device pointer
8613  *
8614  * Fetches a GPU clock counter snapshot (SI).
8615  * Returns the 64 bit clock counter snapshot.
8616  */
8617 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8618 {
8619         uint64_t clock;
8620
8621         mutex_lock(&rdev->gpu_clock_mutex);
8622         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8623         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8624                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8625         mutex_unlock(&rdev->gpu_clock_mutex);
8626         return clock;
8627 }
8628
8629 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8630                               u32 cntl_reg, u32 status_reg)
8631 {
8632         int r, i;
8633         struct atom_clock_dividers dividers;
8634         uint32_t tmp;
8635
8636         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8637                                            clock, false, &dividers);
8638         if (r)
8639                 return r;
8640
8641         tmp = RREG32_SMC(cntl_reg);
8642         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8643         tmp |= dividers.post_divider;
8644         WREG32_SMC(cntl_reg, tmp);
8645
8646         for (i = 0; i < 100; i++) {
8647                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8648                         break;
8649                 mdelay(10);
8650         }
8651         if (i == 100)
8652                 return -ETIMEDOUT;
8653
8654         return 0;
8655 }
8656
8657 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8658 {
8659         int r = 0;
8660
8661         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8662         if (r)
8663                 return r;
8664
8665         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8666         return r;
8667 }
8668
8669 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8670 {
8671         struct pci_dev *root = rdev->pdev->bus->self;
8672         int bridge_pos, gpu_pos;
8673         u32 speed_cntl, mask, current_data_rate;
8674         int ret, i;
8675         u16 tmp16;
8676
8677         if (radeon_pcie_gen2 == 0)
8678                 return;
8679
8680         if (rdev->flags & RADEON_IS_IGP)
8681                 return;
8682
8683         if (!(rdev->flags & RADEON_IS_PCIE))
8684                 return;
8685
8686         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8687         if (ret != 0)
8688                 return;
8689
8690         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8691                 return;
8692
8693         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8694         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8695                 LC_CURRENT_DATA_RATE_SHIFT;
8696         if (mask & DRM_PCIE_SPEED_80) {
8697                 if (current_data_rate == 2) {
8698                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8699                         return;
8700                 }
8701                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8702         } else if (mask & DRM_PCIE_SPEED_50) {
8703                 if (current_data_rate == 1) {
8704                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8705                         return;
8706                 }
8707                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8708         }
8709
8710         bridge_pos = pci_pcie_cap(root);
8711         if (!bridge_pos)
8712                 return;
8713
8714         gpu_pos = pci_pcie_cap(rdev->pdev);
8715         if (!gpu_pos)
8716                 return;
8717
8718         if (mask & DRM_PCIE_SPEED_80) {
8719                 /* re-try equalization if gen3 is not already enabled */
8720                 if (current_data_rate != 2) {
8721                         u16 bridge_cfg, gpu_cfg;
8722                         u16 bridge_cfg2, gpu_cfg2;
8723                         u32 max_lw, current_lw, tmp;
8724
8725                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8726                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8727
8728                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8729                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8730
8731                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8732                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8733
8734                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8735                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8736                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8737
8738                         if (current_lw < max_lw) {
8739                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8740                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8741                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8742                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8743                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8744                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8745                                 }
8746                         }
8747
8748                         for (i = 0; i < 10; i++) {
8749                                 /* check status */
8750                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8751                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8752                                         break;
8753
8754                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8755                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8756
8757                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8758                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8759
8760                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8761                                 tmp |= LC_SET_QUIESCE;
8762                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8763
8764                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8765                                 tmp |= LC_REDO_EQ;
8766                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8767
8768                                 mdelay(100);
8769
8770                                 /* linkctl */
8771                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8772                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8773                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8774                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8775
8776                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8777                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8778                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8779                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8780
8781                                 /* linkctl2 */
8782                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8783                                 tmp16 &= ~((1 << 4) | (7 << 9));
8784                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8785                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8786
8787                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8788                                 tmp16 &= ~((1 << 4) | (7 << 9));
8789                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8790                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8791
8792                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8793                                 tmp &= ~LC_SET_QUIESCE;
8794                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8795                         }
8796                 }
8797         }
8798
8799         /* set the link speed */
8800         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8801         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8802         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8803
8804         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8805         tmp16 &= ~0xf;
8806         if (mask & DRM_PCIE_SPEED_80)
8807                 tmp16 |= 3; /* gen3 */
8808         else if (mask & DRM_PCIE_SPEED_50)
8809                 tmp16 |= 2; /* gen2 */
8810         else
8811                 tmp16 |= 1; /* gen1 */
8812         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8813
8814         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8815         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8816         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8817
8818         for (i = 0; i < rdev->usec_timeout; i++) {
8819                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8820                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8821                         break;
8822                 udelay(1);
8823         }
8824 }
8825
8826 static void cik_program_aspm(struct radeon_device *rdev)
8827 {
8828         u32 data, orig;
8829         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8830         bool disable_clkreq = false;
8831
8832         if (radeon_aspm == 0)
8833                 return;
8834
8835         /* XXX double check IGPs */
8836         if (rdev->flags & RADEON_IS_IGP)
8837                 return;
8838
8839         if (!(rdev->flags & RADEON_IS_PCIE))
8840                 return;
8841
8842         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8843         data &= ~LC_XMIT_N_FTS_MASK;
8844         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8845         if (orig != data)
8846                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8847
8848         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8849         data |= LC_GO_TO_RECOVERY;
8850         if (orig != data)
8851                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8852
8853         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8854         data |= P_IGNORE_EDB_ERR;
8855         if (orig != data)
8856                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8857
8858         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8859         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8860         data |= LC_PMI_TO_L1_DIS;
8861         if (!disable_l0s)
8862                 data |= LC_L0S_INACTIVITY(7);
8863
8864         if (!disable_l1) {
8865                 data |= LC_L1_INACTIVITY(7);
8866                 data &= ~LC_PMI_TO_L1_DIS;
8867                 if (orig != data)
8868                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8869
8870                 if (!disable_plloff_in_l1) {
8871                         bool clk_req_support;
8872
8873                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8874                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8875                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8876                         if (orig != data)
8877                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8878
8879                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8880                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8881                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8882                         if (orig != data)
8883                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8884
8885                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8886                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8887                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8888                         if (orig != data)
8889                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8890
8891                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8892                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8893                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8894                         if (orig != data)
8895                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8896
8897                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8898                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8899                         data |= LC_DYN_LANES_PWR_STATE(3);
8900                         if (orig != data)
8901                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8902
8903                         if (!disable_clkreq) {
8904                                 struct pci_dev *root = rdev->pdev->bus->self;
8905                                 u32 lnkcap;
8906
8907                                 clk_req_support = false;
8908                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8909                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8910                                         clk_req_support = true;
8911                         } else {
8912                                 clk_req_support = false;
8913                         }
8914
8915                         if (clk_req_support) {
8916                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8917                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8918                                 if (orig != data)
8919                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8920
8921                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8922                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8923                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8924                                 if (orig != data)
8925                                         WREG32_SMC(THM_CLK_CNTL, data);
8926
8927                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8928                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8929                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8930                                 if (orig != data)
8931                                         WREG32_SMC(MISC_CLK_CTRL, data);
8932
8933                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8934                                 data &= ~BCLK_AS_XCLK;
8935                                 if (orig != data)
8936                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8937
8938                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8939                                 data &= ~FORCE_BIF_REFCLK_EN;
8940                                 if (orig != data)
8941                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8942
8943                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8944                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8945                                 data |= MPLL_CLKOUT_SEL(4);
8946                                 if (orig != data)
8947                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8948                         }
8949                 }
8950         } else {
8951                 if (orig != data)
8952                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8953         }
8954
8955         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8956         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8957         if (orig != data)
8958                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8959
8960         if (!disable_l0s) {
8961                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8962                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8963                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8964                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8965                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8966                                 data &= ~LC_L0S_INACTIVITY_MASK;
8967                                 if (orig != data)
8968                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8969                         }
8970                 }
8971         }
8972 }