Merge branch 'acpi-ec'
[linux-drm-fsl-dcu.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36
37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46
47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48 MODULE_FIRMWARE("radeon/bonaire_me.bin");
49 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55
56 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
65
66 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67 MODULE_FIRMWARE("radeon/hawaii_me.bin");
68 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
73 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
74
75 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
81
82 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
83 MODULE_FIRMWARE("radeon/kaveri_me.bin");
84 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
87 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
88 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
89
90 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
91 MODULE_FIRMWARE("radeon/KABINI_me.bin");
92 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
93 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
94 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
95 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
96
97 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
98 MODULE_FIRMWARE("radeon/kabini_me.bin");
99 MODULE_FIRMWARE("radeon/kabini_ce.bin");
100 MODULE_FIRMWARE("radeon/kabini_mec.bin");
101 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
102 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
103
104 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
110
111 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
112 MODULE_FIRMWARE("radeon/mullins_me.bin");
113 MODULE_FIRMWARE("radeon/mullins_ce.bin");
114 MODULE_FIRMWARE("radeon/mullins_mec.bin");
115 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
116 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
117
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
123 extern void sumo_rlc_fini(struct radeon_device *rdev);
124 extern int sumo_rlc_init(struct radeon_device *rdev);
125 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
126 extern void si_rlc_reset(struct radeon_device *rdev);
127 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
128 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 extern int cik_sdma_resume(struct radeon_device *rdev);
130 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
131 extern void cik_sdma_fini(struct radeon_device *rdev);
132 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
133 static void cik_rlc_stop(struct radeon_device *rdev);
134 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135 static void cik_program_aspm(struct radeon_device *rdev);
136 static void cik_init_pg(struct radeon_device *rdev);
137 static void cik_init_cg(struct radeon_device *rdev);
138 static void cik_fini_pg(struct radeon_device *rdev);
139 static void cik_fini_cg(struct radeon_device *rdev);
140 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141                                           bool enable);
142
143 /* get temperature in millidegrees */
144 int ci_get_temp(struct radeon_device *rdev)
145 {
146         u32 temp;
147         int actual_temp = 0;
148
149         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
150                 CTF_TEMP_SHIFT;
151
152         if (temp & 0x200)
153                 actual_temp = 255;
154         else
155                 actual_temp = temp & 0x1ff;
156
157         actual_temp = actual_temp * 1000;
158
159         return actual_temp;
160 }
161
162 /* get temperature in millidegrees */
163 int kv_get_temp(struct radeon_device *rdev)
164 {
165         u32 temp;
166         int actual_temp = 0;
167
168         temp = RREG32_SMC(0xC0300E0C);
169
170         if (temp)
171                 actual_temp = (temp / 8) - 49;
172         else
173                 actual_temp = 0;
174
175         actual_temp = actual_temp * 1000;
176
177         return actual_temp;
178 }
179
180 /*
181  * Indirect registers accessor
182  */
183 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
184 {
185         unsigned long flags;
186         u32 r;
187
188         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
189         WREG32(PCIE_INDEX, reg);
190         (void)RREG32(PCIE_INDEX);
191         r = RREG32(PCIE_DATA);
192         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
193         return r;
194 }
195
196 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198         unsigned long flags;
199
200         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
201         WREG32(PCIE_INDEX, reg);
202         (void)RREG32(PCIE_INDEX);
203         WREG32(PCIE_DATA, v);
204         (void)RREG32(PCIE_DATA);
205         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
206 }
207
208 static const u32 spectre_rlc_save_restore_register_list[] =
209 {
210         (0x0e00 << 16) | (0xc12c >> 2),
211         0x00000000,
212         (0x0e00 << 16) | (0xc140 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0xc150 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0xc15c >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0xc168 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc170 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc178 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc204 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc2b4 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc2b8 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc2bc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc2c0 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0x8228 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0x829c >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0x869c >> 2),
239         0x00000000,
240         (0x0600 << 16) | (0x98f4 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0x98f8 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0x9900 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc260 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0x90e8 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0x3c000 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0x3c00c >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0x8c1c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0x9700 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xcd20 >> 2),
259         0x00000000,
260         (0x4e00 << 16) | (0xcd20 >> 2),
261         0x00000000,
262         (0x5e00 << 16) | (0xcd20 >> 2),
263         0x00000000,
264         (0x6e00 << 16) | (0xcd20 >> 2),
265         0x00000000,
266         (0x7e00 << 16) | (0xcd20 >> 2),
267         0x00000000,
268         (0x8e00 << 16) | (0xcd20 >> 2),
269         0x00000000,
270         (0x9e00 << 16) | (0xcd20 >> 2),
271         0x00000000,
272         (0xae00 << 16) | (0xcd20 >> 2),
273         0x00000000,
274         (0xbe00 << 16) | (0xcd20 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x89bc >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x8900 >> 2),
279         0x00000000,
280         0x3,
281         (0x0e00 << 16) | (0xc130 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc134 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc1fc >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc208 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc264 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc268 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc26c >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc270 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0xc274 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0xc278 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0xc27c >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0xc280 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc284 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc288 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc28c >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0xc290 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0xc294 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0xc298 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xc29c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xc2a0 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xc2a4 >> 2),
322         0x00000000,
323         (0x0e00 << 16) | (0xc2a8 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0xc2ac  >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0xc2b0 >> 2),
328         0x00000000,
329         (0x0e00 << 16) | (0x301d0 >> 2),
330         0x00000000,
331         (0x0e00 << 16) | (0x30238 >> 2),
332         0x00000000,
333         (0x0e00 << 16) | (0x30250 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x30254 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x30258 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x3025c >> 2),
340         0x00000000,
341         (0x4e00 << 16) | (0xc900 >> 2),
342         0x00000000,
343         (0x5e00 << 16) | (0xc900 >> 2),
344         0x00000000,
345         (0x6e00 << 16) | (0xc900 >> 2),
346         0x00000000,
347         (0x7e00 << 16) | (0xc900 >> 2),
348         0x00000000,
349         (0x8e00 << 16) | (0xc900 >> 2),
350         0x00000000,
351         (0x9e00 << 16) | (0xc900 >> 2),
352         0x00000000,
353         (0xae00 << 16) | (0xc900 >> 2),
354         0x00000000,
355         (0xbe00 << 16) | (0xc900 >> 2),
356         0x00000000,
357         (0x4e00 << 16) | (0xc904 >> 2),
358         0x00000000,
359         (0x5e00 << 16) | (0xc904 >> 2),
360         0x00000000,
361         (0x6e00 << 16) | (0xc904 >> 2),
362         0x00000000,
363         (0x7e00 << 16) | (0xc904 >> 2),
364         0x00000000,
365         (0x8e00 << 16) | (0xc904 >> 2),
366         0x00000000,
367         (0x9e00 << 16) | (0xc904 >> 2),
368         0x00000000,
369         (0xae00 << 16) | (0xc904 >> 2),
370         0x00000000,
371         (0xbe00 << 16) | (0xc904 >> 2),
372         0x00000000,
373         (0x4e00 << 16) | (0xc908 >> 2),
374         0x00000000,
375         (0x5e00 << 16) | (0xc908 >> 2),
376         0x00000000,
377         (0x6e00 << 16) | (0xc908 >> 2),
378         0x00000000,
379         (0x7e00 << 16) | (0xc908 >> 2),
380         0x00000000,
381         (0x8e00 << 16) | (0xc908 >> 2),
382         0x00000000,
383         (0x9e00 << 16) | (0xc908 >> 2),
384         0x00000000,
385         (0xae00 << 16) | (0xc908 >> 2),
386         0x00000000,
387         (0xbe00 << 16) | (0xc908 >> 2),
388         0x00000000,
389         (0x4e00 << 16) | (0xc90c >> 2),
390         0x00000000,
391         (0x5e00 << 16) | (0xc90c >> 2),
392         0x00000000,
393         (0x6e00 << 16) | (0xc90c >> 2),
394         0x00000000,
395         (0x7e00 << 16) | (0xc90c >> 2),
396         0x00000000,
397         (0x8e00 << 16) | (0xc90c >> 2),
398         0x00000000,
399         (0x9e00 << 16) | (0xc90c >> 2),
400         0x00000000,
401         (0xae00 << 16) | (0xc90c >> 2),
402         0x00000000,
403         (0xbe00 << 16) | (0xc90c >> 2),
404         0x00000000,
405         (0x4e00 << 16) | (0xc910 >> 2),
406         0x00000000,
407         (0x5e00 << 16) | (0xc910 >> 2),
408         0x00000000,
409         (0x6e00 << 16) | (0xc910 >> 2),
410         0x00000000,
411         (0x7e00 << 16) | (0xc910 >> 2),
412         0x00000000,
413         (0x8e00 << 16) | (0xc910 >> 2),
414         0x00000000,
415         (0x9e00 << 16) | (0xc910 >> 2),
416         0x00000000,
417         (0xae00 << 16) | (0xc910 >> 2),
418         0x00000000,
419         (0xbe00 << 16) | (0xc910 >> 2),
420         0x00000000,
421         (0x0e00 << 16) | (0xc99c >> 2),
422         0x00000000,
423         (0x0e00 << 16) | (0x9834 >> 2),
424         0x00000000,
425         (0x0000 << 16) | (0x30f00 >> 2),
426         0x00000000,
427         (0x0001 << 16) | (0x30f00 >> 2),
428         0x00000000,
429         (0x0000 << 16) | (0x30f04 >> 2),
430         0x00000000,
431         (0x0001 << 16) | (0x30f04 >> 2),
432         0x00000000,
433         (0x0000 << 16) | (0x30f08 >> 2),
434         0x00000000,
435         (0x0001 << 16) | (0x30f08 >> 2),
436         0x00000000,
437         (0x0000 << 16) | (0x30f0c >> 2),
438         0x00000000,
439         (0x0001 << 16) | (0x30f0c >> 2),
440         0x00000000,
441         (0x0600 << 16) | (0x9b7c >> 2),
442         0x00000000,
443         (0x0e00 << 16) | (0x8a14 >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0x8a18 >> 2),
446         0x00000000,
447         (0x0600 << 16) | (0x30a00 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x8bf0 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x8bcc >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x8b24 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x30a04 >> 2),
456         0x00000000,
457         (0x0600 << 16) | (0x30a10 >> 2),
458         0x00000000,
459         (0x0600 << 16) | (0x30a14 >> 2),
460         0x00000000,
461         (0x0600 << 16) | (0x30a18 >> 2),
462         0x00000000,
463         (0x0600 << 16) | (0x30a2c >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0xc700 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0xc704 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0xc708 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0xc768 >> 2),
472         0x00000000,
473         (0x0400 << 16) | (0xc770 >> 2),
474         0x00000000,
475         (0x0400 << 16) | (0xc774 >> 2),
476         0x00000000,
477         (0x0400 << 16) | (0xc778 >> 2),
478         0x00000000,
479         (0x0400 << 16) | (0xc77c >> 2),
480         0x00000000,
481         (0x0400 << 16) | (0xc780 >> 2),
482         0x00000000,
483         (0x0400 << 16) | (0xc784 >> 2),
484         0x00000000,
485         (0x0400 << 16) | (0xc788 >> 2),
486         0x00000000,
487         (0x0400 << 16) | (0xc78c >> 2),
488         0x00000000,
489         (0x0400 << 16) | (0xc798 >> 2),
490         0x00000000,
491         (0x0400 << 16) | (0xc79c >> 2),
492         0x00000000,
493         (0x0400 << 16) | (0xc7a0 >> 2),
494         0x00000000,
495         (0x0400 << 16) | (0xc7a4 >> 2),
496         0x00000000,
497         (0x0400 << 16) | (0xc7a8 >> 2),
498         0x00000000,
499         (0x0400 << 16) | (0xc7ac >> 2),
500         0x00000000,
501         (0x0400 << 16) | (0xc7b0 >> 2),
502         0x00000000,
503         (0x0400 << 16) | (0xc7b4 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x9100 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x3c010 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x92a8 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0x92ac >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x92b4 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x92b8 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x92bc >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x92c0 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x92c4 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x92c8 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0x92cc >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x92d0 >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0x8c00 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0x8c04 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0x8c20 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0x8c38 >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0x8c3c >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xae00 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0x9604 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xac08 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xac0c >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xac10 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xac14 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0xac58 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0xac68 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0xac6c >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0xac70 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0xac74 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0xac78 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0xac7c >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0xac80 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0xac84 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0xac88 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0xac8c >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x970c >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x9714 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x9718 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x971c >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x31068 >> 2),
582         0x00000000,
583         (0x4e00 << 16) | (0x31068 >> 2),
584         0x00000000,
585         (0x5e00 << 16) | (0x31068 >> 2),
586         0x00000000,
587         (0x6e00 << 16) | (0x31068 >> 2),
588         0x00000000,
589         (0x7e00 << 16) | (0x31068 >> 2),
590         0x00000000,
591         (0x8e00 << 16) | (0x31068 >> 2),
592         0x00000000,
593         (0x9e00 << 16) | (0x31068 >> 2),
594         0x00000000,
595         (0xae00 << 16) | (0x31068 >> 2),
596         0x00000000,
597         (0xbe00 << 16) | (0x31068 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xcd10 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xcd14 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x88b0 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x88b4 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x88b8 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x88bc >> 2),
610         0x00000000,
611         (0x0400 << 16) | (0x89c0 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x88c4 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x88c8 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x88d0 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x88d4 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x88d8 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x8980 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x30938 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x3093c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x30940 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x89a0 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x30900 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x30904 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x89b4 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x3c210 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x3c214 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x3c218 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x8904 >> 2),
646         0x00000000,
647         0x5,
648         (0x0e00 << 16) | (0x8c28 >> 2),
649         (0x0e00 << 16) | (0x8c2c >> 2),
650         (0x0e00 << 16) | (0x8c30 >> 2),
651         (0x0e00 << 16) | (0x8c34 >> 2),
652         (0x0e00 << 16) | (0x9600 >> 2),
653 };
654
655 static const u32 kalindi_rlc_save_restore_register_list[] =
656 {
657         (0x0e00 << 16) | (0xc12c >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc140 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc150 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc15c >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc168 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc170 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc204 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc2b4 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc2b8 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc2bc >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc2c0 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8228 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x829c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x869c >> 2),
684         0x00000000,
685         (0x0600 << 16) | (0x98f4 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x98f8 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x9900 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc260 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x90e8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x3c000 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x3c00c >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x8c1c >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x9700 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xcd20 >> 2),
704         0x00000000,
705         (0x4e00 << 16) | (0xcd20 >> 2),
706         0x00000000,
707         (0x5e00 << 16) | (0xcd20 >> 2),
708         0x00000000,
709         (0x6e00 << 16) | (0xcd20 >> 2),
710         0x00000000,
711         (0x7e00 << 16) | (0xcd20 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0x89bc >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0x8900 >> 2),
716         0x00000000,
717         0x3,
718         (0x0e00 << 16) | (0xc130 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc134 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc1fc >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc208 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc264 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc268 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc26c >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc270 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc274 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc28c >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc290 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc294 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0xc298 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0xc2a0 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0xc2a4 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0xc2a8 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc2ac >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x301d0 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x30238 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x30250 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x30254 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x30258 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x3025c >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xc900 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xc900 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xc900 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xc900 >> 2),
771         0x00000000,
772         (0x4e00 << 16) | (0xc904 >> 2),
773         0x00000000,
774         (0x5e00 << 16) | (0xc904 >> 2),
775         0x00000000,
776         (0x6e00 << 16) | (0xc904 >> 2),
777         0x00000000,
778         (0x7e00 << 16) | (0xc904 >> 2),
779         0x00000000,
780         (0x4e00 << 16) | (0xc908 >> 2),
781         0x00000000,
782         (0x5e00 << 16) | (0xc908 >> 2),
783         0x00000000,
784         (0x6e00 << 16) | (0xc908 >> 2),
785         0x00000000,
786         (0x7e00 << 16) | (0xc908 >> 2),
787         0x00000000,
788         (0x4e00 << 16) | (0xc90c >> 2),
789         0x00000000,
790         (0x5e00 << 16) | (0xc90c >> 2),
791         0x00000000,
792         (0x6e00 << 16) | (0xc90c >> 2),
793         0x00000000,
794         (0x7e00 << 16) | (0xc90c >> 2),
795         0x00000000,
796         (0x4e00 << 16) | (0xc910 >> 2),
797         0x00000000,
798         (0x5e00 << 16) | (0xc910 >> 2),
799         0x00000000,
800         (0x6e00 << 16) | (0xc910 >> 2),
801         0x00000000,
802         (0x7e00 << 16) | (0xc910 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xc99c >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x9834 >> 2),
807         0x00000000,
808         (0x0000 << 16) | (0x30f00 >> 2),
809         0x00000000,
810         (0x0000 << 16) | (0x30f04 >> 2),
811         0x00000000,
812         (0x0000 << 16) | (0x30f08 >> 2),
813         0x00000000,
814         (0x0000 << 16) | (0x30f0c >> 2),
815         0x00000000,
816         (0x0600 << 16) | (0x9b7c >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x8a14 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0x8a18 >> 2),
821         0x00000000,
822         (0x0600 << 16) | (0x30a00 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0x8bf0 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0x8bcc >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0x8b24 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0x30a04 >> 2),
831         0x00000000,
832         (0x0600 << 16) | (0x30a10 >> 2),
833         0x00000000,
834         (0x0600 << 16) | (0x30a14 >> 2),
835         0x00000000,
836         (0x0600 << 16) | (0x30a18 >> 2),
837         0x00000000,
838         (0x0600 << 16) | (0x30a2c >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xc700 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xc704 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xc708 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xc768 >> 2),
847         0x00000000,
848         (0x0400 << 16) | (0xc770 >> 2),
849         0x00000000,
850         (0x0400 << 16) | (0xc774 >> 2),
851         0x00000000,
852         (0x0400 << 16) | (0xc798 >> 2),
853         0x00000000,
854         (0x0400 << 16) | (0xc79c >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x9100 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x3c010 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x8c00 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x8c04 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0x8c20 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x8c38 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x8c3c >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xae00 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x9604 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xac08 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xac0c >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xac10 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xac14 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xac58 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0xac68 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0xac6c >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0xac70 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0xac74 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0xac78 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0xac7c >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0xac80 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0xac84 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0xac88 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0xac8c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x970c >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x9714 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x9718 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x971c >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x31068 >> 2),
913         0x00000000,
914         (0x4e00 << 16) | (0x31068 >> 2),
915         0x00000000,
916         (0x5e00 << 16) | (0x31068 >> 2),
917         0x00000000,
918         (0x6e00 << 16) | (0x31068 >> 2),
919         0x00000000,
920         (0x7e00 << 16) | (0x31068 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0xcd10 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xcd14 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x88b0 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x88b4 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x88b8 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x88bc >> 2),
933         0x00000000,
934         (0x0400 << 16) | (0x89c0 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x88c4 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x88c8 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x88d0 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x88d4 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x88d8 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x8980 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x30938 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x3093c >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0x30940 >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0x89a0 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0x30900 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0x30904 >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0x89b4 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0x3e1fc >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0x3c210 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0x3c214 >> 2),
967         0x00000000,
968         (0x0e00 << 16) | (0x3c218 >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x8904 >> 2),
971         0x00000000,
972         0x5,
973         (0x0e00 << 16) | (0x8c28 >> 2),
974         (0x0e00 << 16) | (0x8c2c >> 2),
975         (0x0e00 << 16) | (0x8c30 >> 2),
976         (0x0e00 << 16) | (0x8c34 >> 2),
977         (0x0e00 << 16) | (0x9600 >> 2),
978 };
979
980 static const u32 bonaire_golden_spm_registers[] =
981 {
982         0x30800, 0xe0ffffff, 0xe0000000
983 };
984
985 static const u32 bonaire_golden_common_registers[] =
986 {
987         0xc770, 0xffffffff, 0x00000800,
988         0xc774, 0xffffffff, 0x00000800,
989         0xc798, 0xffffffff, 0x00007fbf,
990         0xc79c, 0xffffffff, 0x00007faf
991 };
992
993 static const u32 bonaire_golden_registers[] =
994 {
995         0x3354, 0x00000333, 0x00000333,
996         0x3350, 0x000c0fc0, 0x00040200,
997         0x9a10, 0x00010000, 0x00058208,
998         0x3c000, 0xffff1fff, 0x00140000,
999         0x3c200, 0xfdfc0fff, 0x00000100,
1000         0x3c234, 0x40000000, 0x40000200,
1001         0x9830, 0xffffffff, 0x00000000,
1002         0x9834, 0xf00fffff, 0x00000400,
1003         0x9838, 0x0002021c, 0x00020200,
1004         0xc78, 0x00000080, 0x00000000,
1005         0x5bb0, 0x000000f0, 0x00000070,
1006         0x5bc0, 0xf0311fff, 0x80300000,
1007         0x98f8, 0x73773777, 0x12010001,
1008         0x350c, 0x00810000, 0x408af000,
1009         0x7030, 0x31000111, 0x00000011,
1010         0x2f48, 0x73773777, 0x12010001,
1011         0x220c, 0x00007fb6, 0x0021a1b1,
1012         0x2210, 0x00007fb6, 0x002021b1,
1013         0x2180, 0x00007fb6, 0x00002191,
1014         0x2218, 0x00007fb6, 0x002121b1,
1015         0x221c, 0x00007fb6, 0x002021b1,
1016         0x21dc, 0x00007fb6, 0x00002191,
1017         0x21e0, 0x00007fb6, 0x00002191,
1018         0x3628, 0x0000003f, 0x0000000a,
1019         0x362c, 0x0000003f, 0x0000000a,
1020         0x2ae4, 0x00073ffe, 0x000022a2,
1021         0x240c, 0x000007ff, 0x00000000,
1022         0x8a14, 0xf000003f, 0x00000007,
1023         0x8bf0, 0x00002001, 0x00000001,
1024         0x8b24, 0xffffffff, 0x00ffffff,
1025         0x30a04, 0x0000ff0f, 0x00000000,
1026         0x28a4c, 0x07ffffff, 0x06000000,
1027         0x4d8, 0x00000fff, 0x00000100,
1028         0x3e78, 0x00000001, 0x00000002,
1029         0x9100, 0x03000000, 0x0362c688,
1030         0x8c00, 0x000000ff, 0x00000001,
1031         0xe40, 0x00001fff, 0x00001fff,
1032         0x9060, 0x0000007f, 0x00000020,
1033         0x9508, 0x00010000, 0x00010000,
1034         0xac14, 0x000003ff, 0x000000f3,
1035         0xac0c, 0xffffffff, 0x00001032
1036 };
1037
1038 static const u32 bonaire_mgcg_cgcg_init[] =
1039 {
1040         0xc420, 0xffffffff, 0xfffffffc,
1041         0x30800, 0xffffffff, 0xe0000000,
1042         0x3c2a0, 0xffffffff, 0x00000100,
1043         0x3c208, 0xffffffff, 0x00000100,
1044         0x3c2c0, 0xffffffff, 0xc0000100,
1045         0x3c2c8, 0xffffffff, 0xc0000100,
1046         0x3c2c4, 0xffffffff, 0xc0000100,
1047         0x55e4, 0xffffffff, 0x00600100,
1048         0x3c280, 0xffffffff, 0x00000100,
1049         0x3c214, 0xffffffff, 0x06000100,
1050         0x3c220, 0xffffffff, 0x00000100,
1051         0x3c218, 0xffffffff, 0x06000100,
1052         0x3c204, 0xffffffff, 0x00000100,
1053         0x3c2e0, 0xffffffff, 0x00000100,
1054         0x3c224, 0xffffffff, 0x00000100,
1055         0x3c200, 0xffffffff, 0x00000100,
1056         0x3c230, 0xffffffff, 0x00000100,
1057         0x3c234, 0xffffffff, 0x00000100,
1058         0x3c250, 0xffffffff, 0x00000100,
1059         0x3c254, 0xffffffff, 0x00000100,
1060         0x3c258, 0xffffffff, 0x00000100,
1061         0x3c25c, 0xffffffff, 0x00000100,
1062         0x3c260, 0xffffffff, 0x00000100,
1063         0x3c27c, 0xffffffff, 0x00000100,
1064         0x3c278, 0xffffffff, 0x00000100,
1065         0x3c210, 0xffffffff, 0x06000100,
1066         0x3c290, 0xffffffff, 0x00000100,
1067         0x3c274, 0xffffffff, 0x00000100,
1068         0x3c2b4, 0xffffffff, 0x00000100,
1069         0x3c2b0, 0xffffffff, 0x00000100,
1070         0x3c270, 0xffffffff, 0x00000100,
1071         0x30800, 0xffffffff, 0xe0000000,
1072         0x3c020, 0xffffffff, 0x00010000,
1073         0x3c024, 0xffffffff, 0x00030002,
1074         0x3c028, 0xffffffff, 0x00040007,
1075         0x3c02c, 0xffffffff, 0x00060005,
1076         0x3c030, 0xffffffff, 0x00090008,
1077         0x3c034, 0xffffffff, 0x00010000,
1078         0x3c038, 0xffffffff, 0x00030002,
1079         0x3c03c, 0xffffffff, 0x00040007,
1080         0x3c040, 0xffffffff, 0x00060005,
1081         0x3c044, 0xffffffff, 0x00090008,
1082         0x3c048, 0xffffffff, 0x00010000,
1083         0x3c04c, 0xffffffff, 0x00030002,
1084         0x3c050, 0xffffffff, 0x00040007,
1085         0x3c054, 0xffffffff, 0x00060005,
1086         0x3c058, 0xffffffff, 0x00090008,
1087         0x3c05c, 0xffffffff, 0x00010000,
1088         0x3c060, 0xffffffff, 0x00030002,
1089         0x3c064, 0xffffffff, 0x00040007,
1090         0x3c068, 0xffffffff, 0x00060005,
1091         0x3c06c, 0xffffffff, 0x00090008,
1092         0x3c070, 0xffffffff, 0x00010000,
1093         0x3c074, 0xffffffff, 0x00030002,
1094         0x3c078, 0xffffffff, 0x00040007,
1095         0x3c07c, 0xffffffff, 0x00060005,
1096         0x3c080, 0xffffffff, 0x00090008,
1097         0x3c084, 0xffffffff, 0x00010000,
1098         0x3c088, 0xffffffff, 0x00030002,
1099         0x3c08c, 0xffffffff, 0x00040007,
1100         0x3c090, 0xffffffff, 0x00060005,
1101         0x3c094, 0xffffffff, 0x00090008,
1102         0x3c098, 0xffffffff, 0x00010000,
1103         0x3c09c, 0xffffffff, 0x00030002,
1104         0x3c0a0, 0xffffffff, 0x00040007,
1105         0x3c0a4, 0xffffffff, 0x00060005,
1106         0x3c0a8, 0xffffffff, 0x00090008,
1107         0x3c000, 0xffffffff, 0x96e00200,
1108         0x8708, 0xffffffff, 0x00900100,
1109         0xc424, 0xffffffff, 0x0020003f,
1110         0x38, 0xffffffff, 0x0140001c,
1111         0x3c, 0x000f0000, 0x000f0000,
1112         0x220, 0xffffffff, 0xC060000C,
1113         0x224, 0xc0000fff, 0x00000100,
1114         0xf90, 0xffffffff, 0x00000100,
1115         0xf98, 0x00000101, 0x00000000,
1116         0x20a8, 0xffffffff, 0x00000104,
1117         0x55e4, 0xff000fff, 0x00000100,
1118         0x30cc, 0xc0000fff, 0x00000104,
1119         0xc1e4, 0x00000001, 0x00000001,
1120         0xd00c, 0xff000ff0, 0x00000100,
1121         0xd80c, 0xff000ff0, 0x00000100
1122 };
1123
1124 static const u32 spectre_golden_spm_registers[] =
1125 {
1126         0x30800, 0xe0ffffff, 0xe0000000
1127 };
1128
1129 static const u32 spectre_golden_common_registers[] =
1130 {
1131         0xc770, 0xffffffff, 0x00000800,
1132         0xc774, 0xffffffff, 0x00000800,
1133         0xc798, 0xffffffff, 0x00007fbf,
1134         0xc79c, 0xffffffff, 0x00007faf
1135 };
1136
1137 static const u32 spectre_golden_registers[] =
1138 {
1139         0x3c000, 0xffff1fff, 0x96940200,
1140         0x3c00c, 0xffff0001, 0xff000000,
1141         0x3c200, 0xfffc0fff, 0x00000100,
1142         0x6ed8, 0x00010101, 0x00010000,
1143         0x9834, 0xf00fffff, 0x00000400,
1144         0x9838, 0xfffffffc, 0x00020200,
1145         0x5bb0, 0x000000f0, 0x00000070,
1146         0x5bc0, 0xf0311fff, 0x80300000,
1147         0x98f8, 0x73773777, 0x12010001,
1148         0x9b7c, 0x00ff0000, 0x00fc0000,
1149         0x2f48, 0x73773777, 0x12010001,
1150         0x8a14, 0xf000003f, 0x00000007,
1151         0x8b24, 0xffffffff, 0x00ffffff,
1152         0x28350, 0x3f3f3fff, 0x00000082,
1153         0x28354, 0x0000003f, 0x00000000,
1154         0x3e78, 0x00000001, 0x00000002,
1155         0x913c, 0xffff03df, 0x00000004,
1156         0xc768, 0x00000008, 0x00000008,
1157         0x8c00, 0x000008ff, 0x00000800,
1158         0x9508, 0x00010000, 0x00010000,
1159         0xac0c, 0xffffffff, 0x54763210,
1160         0x214f8, 0x01ff01ff, 0x00000002,
1161         0x21498, 0x007ff800, 0x00200000,
1162         0x2015c, 0xffffffff, 0x00000f40,
1163         0x30934, 0xffffffff, 0x00000001
1164 };
1165
1166 static const u32 spectre_mgcg_cgcg_init[] =
1167 {
1168         0xc420, 0xffffffff, 0xfffffffc,
1169         0x30800, 0xffffffff, 0xe0000000,
1170         0x3c2a0, 0xffffffff, 0x00000100,
1171         0x3c208, 0xffffffff, 0x00000100,
1172         0x3c2c0, 0xffffffff, 0x00000100,
1173         0x3c2c8, 0xffffffff, 0x00000100,
1174         0x3c2c4, 0xffffffff, 0x00000100,
1175         0x55e4, 0xffffffff, 0x00600100,
1176         0x3c280, 0xffffffff, 0x00000100,
1177         0x3c214, 0xffffffff, 0x06000100,
1178         0x3c220, 0xffffffff, 0x00000100,
1179         0x3c218, 0xffffffff, 0x06000100,
1180         0x3c204, 0xffffffff, 0x00000100,
1181         0x3c2e0, 0xffffffff, 0x00000100,
1182         0x3c224, 0xffffffff, 0x00000100,
1183         0x3c200, 0xffffffff, 0x00000100,
1184         0x3c230, 0xffffffff, 0x00000100,
1185         0x3c234, 0xffffffff, 0x00000100,
1186         0x3c250, 0xffffffff, 0x00000100,
1187         0x3c254, 0xffffffff, 0x00000100,
1188         0x3c258, 0xffffffff, 0x00000100,
1189         0x3c25c, 0xffffffff, 0x00000100,
1190         0x3c260, 0xffffffff, 0x00000100,
1191         0x3c27c, 0xffffffff, 0x00000100,
1192         0x3c278, 0xffffffff, 0x00000100,
1193         0x3c210, 0xffffffff, 0x06000100,
1194         0x3c290, 0xffffffff, 0x00000100,
1195         0x3c274, 0xffffffff, 0x00000100,
1196         0x3c2b4, 0xffffffff, 0x00000100,
1197         0x3c2b0, 0xffffffff, 0x00000100,
1198         0x3c270, 0xffffffff, 0x00000100,
1199         0x30800, 0xffffffff, 0xe0000000,
1200         0x3c020, 0xffffffff, 0x00010000,
1201         0x3c024, 0xffffffff, 0x00030002,
1202         0x3c028, 0xffffffff, 0x00040007,
1203         0x3c02c, 0xffffffff, 0x00060005,
1204         0x3c030, 0xffffffff, 0x00090008,
1205         0x3c034, 0xffffffff, 0x00010000,
1206         0x3c038, 0xffffffff, 0x00030002,
1207         0x3c03c, 0xffffffff, 0x00040007,
1208         0x3c040, 0xffffffff, 0x00060005,
1209         0x3c044, 0xffffffff, 0x00090008,
1210         0x3c048, 0xffffffff, 0x00010000,
1211         0x3c04c, 0xffffffff, 0x00030002,
1212         0x3c050, 0xffffffff, 0x00040007,
1213         0x3c054, 0xffffffff, 0x00060005,
1214         0x3c058, 0xffffffff, 0x00090008,
1215         0x3c05c, 0xffffffff, 0x00010000,
1216         0x3c060, 0xffffffff, 0x00030002,
1217         0x3c064, 0xffffffff, 0x00040007,
1218         0x3c068, 0xffffffff, 0x00060005,
1219         0x3c06c, 0xffffffff, 0x00090008,
1220         0x3c070, 0xffffffff, 0x00010000,
1221         0x3c074, 0xffffffff, 0x00030002,
1222         0x3c078, 0xffffffff, 0x00040007,
1223         0x3c07c, 0xffffffff, 0x00060005,
1224         0x3c080, 0xffffffff, 0x00090008,
1225         0x3c084, 0xffffffff, 0x00010000,
1226         0x3c088, 0xffffffff, 0x00030002,
1227         0x3c08c, 0xffffffff, 0x00040007,
1228         0x3c090, 0xffffffff, 0x00060005,
1229         0x3c094, 0xffffffff, 0x00090008,
1230         0x3c098, 0xffffffff, 0x00010000,
1231         0x3c09c, 0xffffffff, 0x00030002,
1232         0x3c0a0, 0xffffffff, 0x00040007,
1233         0x3c0a4, 0xffffffff, 0x00060005,
1234         0x3c0a8, 0xffffffff, 0x00090008,
1235         0x3c0ac, 0xffffffff, 0x00010000,
1236         0x3c0b0, 0xffffffff, 0x00030002,
1237         0x3c0b4, 0xffffffff, 0x00040007,
1238         0x3c0b8, 0xffffffff, 0x00060005,
1239         0x3c0bc, 0xffffffff, 0x00090008,
1240         0x3c000, 0xffffffff, 0x96e00200,
1241         0x8708, 0xffffffff, 0x00900100,
1242         0xc424, 0xffffffff, 0x0020003f,
1243         0x38, 0xffffffff, 0x0140001c,
1244         0x3c, 0x000f0000, 0x000f0000,
1245         0x220, 0xffffffff, 0xC060000C,
1246         0x224, 0xc0000fff, 0x00000100,
1247         0xf90, 0xffffffff, 0x00000100,
1248         0xf98, 0x00000101, 0x00000000,
1249         0x20a8, 0xffffffff, 0x00000104,
1250         0x55e4, 0xff000fff, 0x00000100,
1251         0x30cc, 0xc0000fff, 0x00000104,
1252         0xc1e4, 0x00000001, 0x00000001,
1253         0xd00c, 0xff000ff0, 0x00000100,
1254         0xd80c, 0xff000ff0, 0x00000100
1255 };
1256
1257 static const u32 kalindi_golden_spm_registers[] =
1258 {
1259         0x30800, 0xe0ffffff, 0xe0000000
1260 };
1261
1262 static const u32 kalindi_golden_common_registers[] =
1263 {
1264         0xc770, 0xffffffff, 0x00000800,
1265         0xc774, 0xffffffff, 0x00000800,
1266         0xc798, 0xffffffff, 0x00007fbf,
1267         0xc79c, 0xffffffff, 0x00007faf
1268 };
1269
1270 static const u32 kalindi_golden_registers[] =
1271 {
1272         0x3c000, 0xffffdfff, 0x6e944040,
1273         0x55e4, 0xff607fff, 0xfc000100,
1274         0x3c220, 0xff000fff, 0x00000100,
1275         0x3c224, 0xff000fff, 0x00000100,
1276         0x3c200, 0xfffc0fff, 0x00000100,
1277         0x6ed8, 0x00010101, 0x00010000,
1278         0x9830, 0xffffffff, 0x00000000,
1279         0x9834, 0xf00fffff, 0x00000400,
1280         0x5bb0, 0x000000f0, 0x00000070,
1281         0x5bc0, 0xf0311fff, 0x80300000,
1282         0x98f8, 0x73773777, 0x12010001,
1283         0x98fc, 0xffffffff, 0x00000010,
1284         0x9b7c, 0x00ff0000, 0x00fc0000,
1285         0x8030, 0x00001f0f, 0x0000100a,
1286         0x2f48, 0x73773777, 0x12010001,
1287         0x2408, 0x000fffff, 0x000c007f,
1288         0x8a14, 0xf000003f, 0x00000007,
1289         0x8b24, 0x3fff3fff, 0x00ffcfff,
1290         0x30a04, 0x0000ff0f, 0x00000000,
1291         0x28a4c, 0x07ffffff, 0x06000000,
1292         0x4d8, 0x00000fff, 0x00000100,
1293         0x3e78, 0x00000001, 0x00000002,
1294         0xc768, 0x00000008, 0x00000008,
1295         0x8c00, 0x000000ff, 0x00000003,
1296         0x214f8, 0x01ff01ff, 0x00000002,
1297         0x21498, 0x007ff800, 0x00200000,
1298         0x2015c, 0xffffffff, 0x00000f40,
1299         0x88c4, 0x001f3ae3, 0x00000082,
1300         0x88d4, 0x0000001f, 0x00000010,
1301         0x30934, 0xffffffff, 0x00000000
1302 };
1303
1304 static const u32 kalindi_mgcg_cgcg_init[] =
1305 {
1306         0xc420, 0xffffffff, 0xfffffffc,
1307         0x30800, 0xffffffff, 0xe0000000,
1308         0x3c2a0, 0xffffffff, 0x00000100,
1309         0x3c208, 0xffffffff, 0x00000100,
1310         0x3c2c0, 0xffffffff, 0x00000100,
1311         0x3c2c8, 0xffffffff, 0x00000100,
1312         0x3c2c4, 0xffffffff, 0x00000100,
1313         0x55e4, 0xffffffff, 0x00600100,
1314         0x3c280, 0xffffffff, 0x00000100,
1315         0x3c214, 0xffffffff, 0x06000100,
1316         0x3c220, 0xffffffff, 0x00000100,
1317         0x3c218, 0xffffffff, 0x06000100,
1318         0x3c204, 0xffffffff, 0x00000100,
1319         0x3c2e0, 0xffffffff, 0x00000100,
1320         0x3c224, 0xffffffff, 0x00000100,
1321         0x3c200, 0xffffffff, 0x00000100,
1322         0x3c230, 0xffffffff, 0x00000100,
1323         0x3c234, 0xffffffff, 0x00000100,
1324         0x3c250, 0xffffffff, 0x00000100,
1325         0x3c254, 0xffffffff, 0x00000100,
1326         0x3c258, 0xffffffff, 0x00000100,
1327         0x3c25c, 0xffffffff, 0x00000100,
1328         0x3c260, 0xffffffff, 0x00000100,
1329         0x3c27c, 0xffffffff, 0x00000100,
1330         0x3c278, 0xffffffff, 0x00000100,
1331         0x3c210, 0xffffffff, 0x06000100,
1332         0x3c290, 0xffffffff, 0x00000100,
1333         0x3c274, 0xffffffff, 0x00000100,
1334         0x3c2b4, 0xffffffff, 0x00000100,
1335         0x3c2b0, 0xffffffff, 0x00000100,
1336         0x3c270, 0xffffffff, 0x00000100,
1337         0x30800, 0xffffffff, 0xe0000000,
1338         0x3c020, 0xffffffff, 0x00010000,
1339         0x3c024, 0xffffffff, 0x00030002,
1340         0x3c028, 0xffffffff, 0x00040007,
1341         0x3c02c, 0xffffffff, 0x00060005,
1342         0x3c030, 0xffffffff, 0x00090008,
1343         0x3c034, 0xffffffff, 0x00010000,
1344         0x3c038, 0xffffffff, 0x00030002,
1345         0x3c03c, 0xffffffff, 0x00040007,
1346         0x3c040, 0xffffffff, 0x00060005,
1347         0x3c044, 0xffffffff, 0x00090008,
1348         0x3c000, 0xffffffff, 0x96e00200,
1349         0x8708, 0xffffffff, 0x00900100,
1350         0xc424, 0xffffffff, 0x0020003f,
1351         0x38, 0xffffffff, 0x0140001c,
1352         0x3c, 0x000f0000, 0x000f0000,
1353         0x220, 0xffffffff, 0xC060000C,
1354         0x224, 0xc0000fff, 0x00000100,
1355         0x20a8, 0xffffffff, 0x00000104,
1356         0x55e4, 0xff000fff, 0x00000100,
1357         0x30cc, 0xc0000fff, 0x00000104,
1358         0xc1e4, 0x00000001, 0x00000001,
1359         0xd00c, 0xff000ff0, 0x00000100,
1360         0xd80c, 0xff000ff0, 0x00000100
1361 };
1362
1363 static const u32 hawaii_golden_spm_registers[] =
1364 {
1365         0x30800, 0xe0ffffff, 0xe0000000
1366 };
1367
1368 static const u32 hawaii_golden_common_registers[] =
1369 {
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x28350, 0xffffffff, 0x3a00161a,
1372         0x28354, 0xffffffff, 0x0000002e,
1373         0x9a10, 0xffffffff, 0x00018208,
1374         0x98f8, 0xffffffff, 0x12011003
1375 };
1376
1377 static const u32 hawaii_golden_registers[] =
1378 {
1379         0x3354, 0x00000333, 0x00000333,
1380         0x9a10, 0x00010000, 0x00058208,
1381         0x9830, 0xffffffff, 0x00000000,
1382         0x9834, 0xf00fffff, 0x00000400,
1383         0x9838, 0x0002021c, 0x00020200,
1384         0xc78, 0x00000080, 0x00000000,
1385         0x5bb0, 0x000000f0, 0x00000070,
1386         0x5bc0, 0xf0311fff, 0x80300000,
1387         0x350c, 0x00810000, 0x408af000,
1388         0x7030, 0x31000111, 0x00000011,
1389         0x2f48, 0x73773777, 0x12010001,
1390         0x2120, 0x0000007f, 0x0000001b,
1391         0x21dc, 0x00007fb6, 0x00002191,
1392         0x3628, 0x0000003f, 0x0000000a,
1393         0x362c, 0x0000003f, 0x0000000a,
1394         0x2ae4, 0x00073ffe, 0x000022a2,
1395         0x240c, 0x000007ff, 0x00000000,
1396         0x8bf0, 0x00002001, 0x00000001,
1397         0x8b24, 0xffffffff, 0x00ffffff,
1398         0x30a04, 0x0000ff0f, 0x00000000,
1399         0x28a4c, 0x07ffffff, 0x06000000,
1400         0x3e78, 0x00000001, 0x00000002,
1401         0xc768, 0x00000008, 0x00000008,
1402         0xc770, 0x00000f00, 0x00000800,
1403         0xc774, 0x00000f00, 0x00000800,
1404         0xc798, 0x00ffffff, 0x00ff7fbf,
1405         0xc79c, 0x00ffffff, 0x00ff7faf,
1406         0x8c00, 0x000000ff, 0x00000800,
1407         0xe40, 0x00001fff, 0x00001fff,
1408         0x9060, 0x0000007f, 0x00000020,
1409         0x9508, 0x00010000, 0x00010000,
1410         0xae00, 0x00100000, 0x000ff07c,
1411         0xac14, 0x000003ff, 0x0000000f,
1412         0xac10, 0xffffffff, 0x7564fdec,
1413         0xac0c, 0xffffffff, 0x3120b9a8,
1414         0xac08, 0x20000000, 0x0f9c0000
1415 };
1416
1417 static const u32 hawaii_mgcg_cgcg_init[] =
1418 {
1419         0xc420, 0xffffffff, 0xfffffffd,
1420         0x30800, 0xffffffff, 0xe0000000,
1421         0x3c2a0, 0xffffffff, 0x00000100,
1422         0x3c208, 0xffffffff, 0x00000100,
1423         0x3c2c0, 0xffffffff, 0x00000100,
1424         0x3c2c8, 0xffffffff, 0x00000100,
1425         0x3c2c4, 0xffffffff, 0x00000100,
1426         0x55e4, 0xffffffff, 0x00200100,
1427         0x3c280, 0xffffffff, 0x00000100,
1428         0x3c214, 0xffffffff, 0x06000100,
1429         0x3c220, 0xffffffff, 0x00000100,
1430         0x3c218, 0xffffffff, 0x06000100,
1431         0x3c204, 0xffffffff, 0x00000100,
1432         0x3c2e0, 0xffffffff, 0x00000100,
1433         0x3c224, 0xffffffff, 0x00000100,
1434         0x3c200, 0xffffffff, 0x00000100,
1435         0x3c230, 0xffffffff, 0x00000100,
1436         0x3c234, 0xffffffff, 0x00000100,
1437         0x3c250, 0xffffffff, 0x00000100,
1438         0x3c254, 0xffffffff, 0x00000100,
1439         0x3c258, 0xffffffff, 0x00000100,
1440         0x3c25c, 0xffffffff, 0x00000100,
1441         0x3c260, 0xffffffff, 0x00000100,
1442         0x3c27c, 0xffffffff, 0x00000100,
1443         0x3c278, 0xffffffff, 0x00000100,
1444         0x3c210, 0xffffffff, 0x06000100,
1445         0x3c290, 0xffffffff, 0x00000100,
1446         0x3c274, 0xffffffff, 0x00000100,
1447         0x3c2b4, 0xffffffff, 0x00000100,
1448         0x3c2b0, 0xffffffff, 0x00000100,
1449         0x3c270, 0xffffffff, 0x00000100,
1450         0x30800, 0xffffffff, 0xe0000000,
1451         0x3c020, 0xffffffff, 0x00010000,
1452         0x3c024, 0xffffffff, 0x00030002,
1453         0x3c028, 0xffffffff, 0x00040007,
1454         0x3c02c, 0xffffffff, 0x00060005,
1455         0x3c030, 0xffffffff, 0x00090008,
1456         0x3c034, 0xffffffff, 0x00010000,
1457         0x3c038, 0xffffffff, 0x00030002,
1458         0x3c03c, 0xffffffff, 0x00040007,
1459         0x3c040, 0xffffffff, 0x00060005,
1460         0x3c044, 0xffffffff, 0x00090008,
1461         0x3c048, 0xffffffff, 0x00010000,
1462         0x3c04c, 0xffffffff, 0x00030002,
1463         0x3c050, 0xffffffff, 0x00040007,
1464         0x3c054, 0xffffffff, 0x00060005,
1465         0x3c058, 0xffffffff, 0x00090008,
1466         0x3c05c, 0xffffffff, 0x00010000,
1467         0x3c060, 0xffffffff, 0x00030002,
1468         0x3c064, 0xffffffff, 0x00040007,
1469         0x3c068, 0xffffffff, 0x00060005,
1470         0x3c06c, 0xffffffff, 0x00090008,
1471         0x3c070, 0xffffffff, 0x00010000,
1472         0x3c074, 0xffffffff, 0x00030002,
1473         0x3c078, 0xffffffff, 0x00040007,
1474         0x3c07c, 0xffffffff, 0x00060005,
1475         0x3c080, 0xffffffff, 0x00090008,
1476         0x3c084, 0xffffffff, 0x00010000,
1477         0x3c088, 0xffffffff, 0x00030002,
1478         0x3c08c, 0xffffffff, 0x00040007,
1479         0x3c090, 0xffffffff, 0x00060005,
1480         0x3c094, 0xffffffff, 0x00090008,
1481         0x3c098, 0xffffffff, 0x00010000,
1482         0x3c09c, 0xffffffff, 0x00030002,
1483         0x3c0a0, 0xffffffff, 0x00040007,
1484         0x3c0a4, 0xffffffff, 0x00060005,
1485         0x3c0a8, 0xffffffff, 0x00090008,
1486         0x3c0ac, 0xffffffff, 0x00010000,
1487         0x3c0b0, 0xffffffff, 0x00030002,
1488         0x3c0b4, 0xffffffff, 0x00040007,
1489         0x3c0b8, 0xffffffff, 0x00060005,
1490         0x3c0bc, 0xffffffff, 0x00090008,
1491         0x3c0c0, 0xffffffff, 0x00010000,
1492         0x3c0c4, 0xffffffff, 0x00030002,
1493         0x3c0c8, 0xffffffff, 0x00040007,
1494         0x3c0cc, 0xffffffff, 0x00060005,
1495         0x3c0d0, 0xffffffff, 0x00090008,
1496         0x3c0d4, 0xffffffff, 0x00010000,
1497         0x3c0d8, 0xffffffff, 0x00030002,
1498         0x3c0dc, 0xffffffff, 0x00040007,
1499         0x3c0e0, 0xffffffff, 0x00060005,
1500         0x3c0e4, 0xffffffff, 0x00090008,
1501         0x3c0e8, 0xffffffff, 0x00010000,
1502         0x3c0ec, 0xffffffff, 0x00030002,
1503         0x3c0f0, 0xffffffff, 0x00040007,
1504         0x3c0f4, 0xffffffff, 0x00060005,
1505         0x3c0f8, 0xffffffff, 0x00090008,
1506         0xc318, 0xffffffff, 0x00020200,
1507         0x3350, 0xffffffff, 0x00000200,
1508         0x15c0, 0xffffffff, 0x00000400,
1509         0x55e8, 0xffffffff, 0x00000000,
1510         0x2f50, 0xffffffff, 0x00000902,
1511         0x3c000, 0xffffffff, 0x96940200,
1512         0x8708, 0xffffffff, 0x00900100,
1513         0xc424, 0xffffffff, 0x0020003f,
1514         0x38, 0xffffffff, 0x0140001c,
1515         0x3c, 0x000f0000, 0x000f0000,
1516         0x220, 0xffffffff, 0xc060000c,
1517         0x224, 0xc0000fff, 0x00000100,
1518         0xf90, 0xffffffff, 0x00000100,
1519         0xf98, 0x00000101, 0x00000000,
1520         0x20a8, 0xffffffff, 0x00000104,
1521         0x55e4, 0xff000fff, 0x00000100,
1522         0x30cc, 0xc0000fff, 0x00000104,
1523         0xc1e4, 0x00000001, 0x00000001,
1524         0xd00c, 0xff000ff0, 0x00000100,
1525         0xd80c, 0xff000ff0, 0x00000100
1526 };
1527
1528 static const u32 godavari_golden_registers[] =
1529 {
1530         0x55e4, 0xff607fff, 0xfc000100,
1531         0x6ed8, 0x00010101, 0x00010000,
1532         0x9830, 0xffffffff, 0x00000000,
1533         0x98302, 0xf00fffff, 0x00000400,
1534         0x6130, 0xffffffff, 0x00010000,
1535         0x5bb0, 0x000000f0, 0x00000070,
1536         0x5bc0, 0xf0311fff, 0x80300000,
1537         0x98f8, 0x73773777, 0x12010001,
1538         0x98fc, 0xffffffff, 0x00000010,
1539         0x8030, 0x00001f0f, 0x0000100a,
1540         0x2f48, 0x73773777, 0x12010001,
1541         0x2408, 0x000fffff, 0x000c007f,
1542         0x8a14, 0xf000003f, 0x00000007,
1543         0x8b24, 0xffffffff, 0x00ff0fff,
1544         0x30a04, 0x0000ff0f, 0x00000000,
1545         0x28a4c, 0x07ffffff, 0x06000000,
1546         0x4d8, 0x00000fff, 0x00000100,
1547         0xd014, 0x00010000, 0x00810001,
1548         0xd814, 0x00010000, 0x00810001,
1549         0x3e78, 0x00000001, 0x00000002,
1550         0xc768, 0x00000008, 0x00000008,
1551         0xc770, 0x00000f00, 0x00000800,
1552         0xc774, 0x00000f00, 0x00000800,
1553         0xc798, 0x00ffffff, 0x00ff7fbf,
1554         0xc79c, 0x00ffffff, 0x00ff7faf,
1555         0x8c00, 0x000000ff, 0x00000001,
1556         0x214f8, 0x01ff01ff, 0x00000002,
1557         0x21498, 0x007ff800, 0x00200000,
1558         0x2015c, 0xffffffff, 0x00000f40,
1559         0x88c4, 0x001f3ae3, 0x00000082,
1560         0x88d4, 0x0000001f, 0x00000010,
1561         0x30934, 0xffffffff, 0x00000000
1562 };
1563
1564
1565 static void cik_init_golden_registers(struct radeon_device *rdev)
1566 {
1567         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1568         mutex_lock(&rdev->grbm_idx_mutex);
1569         switch (rdev->family) {
1570         case CHIP_BONAIRE:
1571                 radeon_program_register_sequence(rdev,
1572                                                  bonaire_mgcg_cgcg_init,
1573                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1574                 radeon_program_register_sequence(rdev,
1575                                                  bonaire_golden_registers,
1576                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1577                 radeon_program_register_sequence(rdev,
1578                                                  bonaire_golden_common_registers,
1579                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1580                 radeon_program_register_sequence(rdev,
1581                                                  bonaire_golden_spm_registers,
1582                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1583                 break;
1584         case CHIP_KABINI:
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_mgcg_cgcg_init,
1587                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1591                 radeon_program_register_sequence(rdev,
1592                                                  kalindi_golden_common_registers,
1593                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1594                 radeon_program_register_sequence(rdev,
1595                                                  kalindi_golden_spm_registers,
1596                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1597                 break;
1598         case CHIP_MULLINS:
1599                 radeon_program_register_sequence(rdev,
1600                                                  kalindi_mgcg_cgcg_init,
1601                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1602                 radeon_program_register_sequence(rdev,
1603                                                  godavari_golden_registers,
1604                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1605                 radeon_program_register_sequence(rdev,
1606                                                  kalindi_golden_common_registers,
1607                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1608                 radeon_program_register_sequence(rdev,
1609                                                  kalindi_golden_spm_registers,
1610                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1611                 break;
1612         case CHIP_KAVERI:
1613                 radeon_program_register_sequence(rdev,
1614                                                  spectre_mgcg_cgcg_init,
1615                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1616                 radeon_program_register_sequence(rdev,
1617                                                  spectre_golden_registers,
1618                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1619                 radeon_program_register_sequence(rdev,
1620                                                  spectre_golden_common_registers,
1621                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1622                 radeon_program_register_sequence(rdev,
1623                                                  spectre_golden_spm_registers,
1624                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1625                 break;
1626         case CHIP_HAWAII:
1627                 radeon_program_register_sequence(rdev,
1628                                                  hawaii_mgcg_cgcg_init,
1629                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1630                 radeon_program_register_sequence(rdev,
1631                                                  hawaii_golden_registers,
1632                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1633                 radeon_program_register_sequence(rdev,
1634                                                  hawaii_golden_common_registers,
1635                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  hawaii_golden_spm_registers,
1638                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1639                 break;
1640         default:
1641                 break;
1642         }
1643         mutex_unlock(&rdev->grbm_idx_mutex);
1644 }
1645
1646 /**
1647  * cik_get_xclk - get the xclk
1648  *
1649  * @rdev: radeon_device pointer
1650  *
1651  * Returns the reference clock used by the gfx engine
1652  * (CIK).
1653  */
1654 u32 cik_get_xclk(struct radeon_device *rdev)
1655 {
1656         u32 reference_clock = rdev->clock.spll.reference_freq;
1657
1658         if (rdev->flags & RADEON_IS_IGP) {
1659                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1660                         return reference_clock / 2;
1661         } else {
1662                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1663                         return reference_clock / 4;
1664         }
1665         return reference_clock;
1666 }
1667
1668 /**
1669  * cik_mm_rdoorbell - read a doorbell dword
1670  *
1671  * @rdev: radeon_device pointer
1672  * @index: doorbell index
1673  *
1674  * Returns the value in the doorbell aperture at the
1675  * requested doorbell index (CIK).
1676  */
1677 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1678 {
1679         if (index < rdev->doorbell.num_doorbells) {
1680                 return readl(rdev->doorbell.ptr + index);
1681         } else {
1682                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1683                 return 0;
1684         }
1685 }
1686
1687 /**
1688  * cik_mm_wdoorbell - write a doorbell dword
1689  *
1690  * @rdev: radeon_device pointer
1691  * @index: doorbell index
1692  * @v: value to write
1693  *
1694  * Writes @v to the doorbell aperture at the
1695  * requested doorbell index (CIK).
1696  */
1697 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1698 {
1699         if (index < rdev->doorbell.num_doorbells) {
1700                 writel(v, rdev->doorbell.ptr + index);
1701         } else {
1702                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1703         }
1704 }
1705
1706 #define BONAIRE_IO_MC_REGS_SIZE 36
1707
1708 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1709 {
1710         {0x00000070, 0x04400000},
1711         {0x00000071, 0x80c01803},
1712         {0x00000072, 0x00004004},
1713         {0x00000073, 0x00000100},
1714         {0x00000074, 0x00ff0000},
1715         {0x00000075, 0x34000000},
1716         {0x00000076, 0x08000014},
1717         {0x00000077, 0x00cc08ec},
1718         {0x00000078, 0x00000400},
1719         {0x00000079, 0x00000000},
1720         {0x0000007a, 0x04090000},
1721         {0x0000007c, 0x00000000},
1722         {0x0000007e, 0x4408a8e8},
1723         {0x0000007f, 0x00000304},
1724         {0x00000080, 0x00000000},
1725         {0x00000082, 0x00000001},
1726         {0x00000083, 0x00000002},
1727         {0x00000084, 0xf3e4f400},
1728         {0x00000085, 0x052024e3},
1729         {0x00000087, 0x00000000},
1730         {0x00000088, 0x01000000},
1731         {0x0000008a, 0x1c0a0000},
1732         {0x0000008b, 0xff010000},
1733         {0x0000008d, 0xffffefff},
1734         {0x0000008e, 0xfff3efff},
1735         {0x0000008f, 0xfff3efbf},
1736         {0x00000092, 0xf7ffffff},
1737         {0x00000093, 0xffffff7f},
1738         {0x00000095, 0x00101101},
1739         {0x00000096, 0x00000fff},
1740         {0x00000097, 0x00116fff},
1741         {0x00000098, 0x60010000},
1742         {0x00000099, 0x10010000},
1743         {0x0000009a, 0x00006000},
1744         {0x0000009b, 0x00001000},
1745         {0x0000009f, 0x00b48000}
1746 };
1747
1748 #define HAWAII_IO_MC_REGS_SIZE 22
1749
1750 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1751 {
1752         {0x0000007d, 0x40000000},
1753         {0x0000007e, 0x40180304},
1754         {0x0000007f, 0x0000ff00},
1755         {0x00000081, 0x00000000},
1756         {0x00000083, 0x00000800},
1757         {0x00000086, 0x00000000},
1758         {0x00000087, 0x00000100},
1759         {0x00000088, 0x00020100},
1760         {0x00000089, 0x00000000},
1761         {0x0000008b, 0x00040000},
1762         {0x0000008c, 0x00000100},
1763         {0x0000008e, 0xff010000},
1764         {0x00000090, 0xffffefff},
1765         {0x00000091, 0xfff3efff},
1766         {0x00000092, 0xfff3efbf},
1767         {0x00000093, 0xf7ffffff},
1768         {0x00000094, 0xffffff7f},
1769         {0x00000095, 0x00000fff},
1770         {0x00000096, 0x00116fff},
1771         {0x00000097, 0x60010000},
1772         {0x00000098, 0x10010000},
1773         {0x0000009f, 0x00c79000}
1774 };
1775
1776
1777 /**
1778  * cik_srbm_select - select specific register instances
1779  *
1780  * @rdev: radeon_device pointer
1781  * @me: selected ME (micro engine)
1782  * @pipe: pipe
1783  * @queue: queue
1784  * @vmid: VMID
1785  *
1786  * Switches the currently active registers instances.  Some
1787  * registers are instanced per VMID, others are instanced per
1788  * me/pipe/queue combination.
1789  */
1790 static void cik_srbm_select(struct radeon_device *rdev,
1791                             u32 me, u32 pipe, u32 queue, u32 vmid)
1792 {
1793         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1794                              MEID(me & 0x3) |
1795                              VMID(vmid & 0xf) |
1796                              QUEUEID(queue & 0x7));
1797         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1798 }
1799
1800 /* ucode loading */
1801 /**
1802  * ci_mc_load_microcode - load MC ucode into the hw
1803  *
1804  * @rdev: radeon_device pointer
1805  *
1806  * Load the GDDR MC ucode into the hw (CIK).
1807  * Returns 0 on success, error on failure.
1808  */
1809 int ci_mc_load_microcode(struct radeon_device *rdev)
1810 {
1811         const __be32 *fw_data = NULL;
1812         const __le32 *new_fw_data = NULL;
1813         u32 running, blackout = 0, tmp;
1814         u32 *io_mc_regs = NULL;
1815         const __le32 *new_io_mc_regs = NULL;
1816         int i, regs_size, ucode_size;
1817
1818         if (!rdev->mc_fw)
1819                 return -EINVAL;
1820
1821         if (rdev->new_fw) {
1822                 const struct mc_firmware_header_v1_0 *hdr =
1823                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1824
1825                 radeon_ucode_print_mc_hdr(&hdr->header);
1826
1827                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1828                 new_io_mc_regs = (const __le32 *)
1829                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1830                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1831                 new_fw_data = (const __le32 *)
1832                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1833         } else {
1834                 ucode_size = rdev->mc_fw->size / 4;
1835
1836                 switch (rdev->family) {
1837                 case CHIP_BONAIRE:
1838                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1839                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1840                         break;
1841                 case CHIP_HAWAII:
1842                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1843                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1844                         break;
1845                 default:
1846                         return -EINVAL;
1847                 }
1848                 fw_data = (const __be32 *)rdev->mc_fw->data;
1849         }
1850
1851         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1852
1853         if (running == 0) {
1854                 if (running) {
1855                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1856                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1857                 }
1858
1859                 /* reset the engine and set to writable */
1860                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1862
1863                 /* load mc io regs */
1864                 for (i = 0; i < regs_size; i++) {
1865                         if (rdev->new_fw) {
1866                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1867                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1868                         } else {
1869                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1870                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1871                         }
1872                 }
1873
1874                 tmp = RREG32(MC_SEQ_MISC0);
1875                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1876                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1877                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1878                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1879                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1880                 }
1881
1882                 /* load the MC ucode */
1883                 for (i = 0; i < ucode_size; i++) {
1884                         if (rdev->new_fw)
1885                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1886                         else
1887                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1888                 }
1889
1890                 /* put the engine back into the active state */
1891                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1892                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1893                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1894
1895                 /* wait for training to complete */
1896                 for (i = 0; i < rdev->usec_timeout; i++) {
1897                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1898                                 break;
1899                         udelay(1);
1900                 }
1901                 for (i = 0; i < rdev->usec_timeout; i++) {
1902                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1903                                 break;
1904                         udelay(1);
1905                 }
1906
1907                 if (running)
1908                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1909         }
1910
1911         return 0;
1912 }
1913
1914 /**
1915  * cik_init_microcode - load ucode images from disk
1916  *
1917  * @rdev: radeon_device pointer
1918  *
1919  * Use the firmware interface to load the ucode images into
1920  * the driver (not loaded into hw).
1921  * Returns 0 on success, error on failure.
1922  */
1923 static int cik_init_microcode(struct radeon_device *rdev)
1924 {
1925         const char *chip_name;
1926         const char *new_chip_name;
1927         size_t pfp_req_size, me_req_size, ce_req_size,
1928                 mec_req_size, rlc_req_size, mc_req_size = 0,
1929                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1930         char fw_name[30];
1931         int new_fw = 0;
1932         int err;
1933         int num_fw;
1934
1935         DRM_DEBUG("\n");
1936
1937         switch (rdev->family) {
1938         case CHIP_BONAIRE:
1939                 chip_name = "BONAIRE";
1940                 new_chip_name = "bonaire";
1941                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1942                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1943                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1944                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1945                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1946                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1947                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1948                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1949                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1950                 num_fw = 8;
1951                 break;
1952         case CHIP_HAWAII:
1953                 chip_name = "HAWAII";
1954                 new_chip_name = "hawaii";
1955                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1956                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1957                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1958                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1959                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1960                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1961                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1962                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1963                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1964                 num_fw = 8;
1965                 break;
1966         case CHIP_KAVERI:
1967                 chip_name = "KAVERI";
1968                 new_chip_name = "kaveri";
1969                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1971                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1974                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1975                 num_fw = 7;
1976                 break;
1977         case CHIP_KABINI:
1978                 chip_name = "KABINI";
1979                 new_chip_name = "kabini";
1980                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1981                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1982                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1983                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1984                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1985                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1986                 num_fw = 6;
1987                 break;
1988         case CHIP_MULLINS:
1989                 chip_name = "MULLINS";
1990                 new_chip_name = "mullins";
1991                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1992                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1993                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1994                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1995                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1996                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                 num_fw = 6;
1998                 break;
1999         default: BUG();
2000         }
2001
2002         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2003
2004         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2005         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2006         if (err) {
2007                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2008                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2009                 if (err)
2010                         goto out;
2011                 if (rdev->pfp_fw->size != pfp_req_size) {
2012                         printk(KERN_ERR
2013                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2014                                rdev->pfp_fw->size, fw_name);
2015                         err = -EINVAL;
2016                         goto out;
2017                 }
2018         } else {
2019                 err = radeon_ucode_validate(rdev->pfp_fw);
2020                 if (err) {
2021                         printk(KERN_ERR
2022                                "cik_fw: validation failed for firmware \"%s\"\n",
2023                                fw_name);
2024                         goto out;
2025                 } else {
2026                         new_fw++;
2027                 }
2028         }
2029
2030         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2031         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2032         if (err) {
2033                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2034                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2035                 if (err)
2036                         goto out;
2037                 if (rdev->me_fw->size != me_req_size) {
2038                         printk(KERN_ERR
2039                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2040                                rdev->me_fw->size, fw_name);
2041                         err = -EINVAL;
2042                 }
2043         } else {
2044                 err = radeon_ucode_validate(rdev->me_fw);
2045                 if (err) {
2046                         printk(KERN_ERR
2047                                "cik_fw: validation failed for firmware \"%s\"\n",
2048                                fw_name);
2049                         goto out;
2050                 } else {
2051                         new_fw++;
2052                 }
2053         }
2054
2055         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2056         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2057         if (err) {
2058                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2059                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2060                 if (err)
2061                         goto out;
2062                 if (rdev->ce_fw->size != ce_req_size) {
2063                         printk(KERN_ERR
2064                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065                                rdev->ce_fw->size, fw_name);
2066                         err = -EINVAL;
2067                 }
2068         } else {
2069                 err = radeon_ucode_validate(rdev->ce_fw);
2070                 if (err) {
2071                         printk(KERN_ERR
2072                                "cik_fw: validation failed for firmware \"%s\"\n",
2073                                fw_name);
2074                         goto out;
2075                 } else {
2076                         new_fw++;
2077                 }
2078         }
2079
2080         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2081         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2082         if (err) {
2083                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2084                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2085                 if (err)
2086                         goto out;
2087                 if (rdev->mec_fw->size != mec_req_size) {
2088                         printk(KERN_ERR
2089                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2090                                rdev->mec_fw->size, fw_name);
2091                         err = -EINVAL;
2092                 }
2093         } else {
2094                 err = radeon_ucode_validate(rdev->mec_fw);
2095                 if (err) {
2096                         printk(KERN_ERR
2097                                "cik_fw: validation failed for firmware \"%s\"\n",
2098                                fw_name);
2099                         goto out;
2100                 } else {
2101                         new_fw++;
2102                 }
2103         }
2104
2105         if (rdev->family == CHIP_KAVERI) {
2106                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2107                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2108                 if (err) {
2109                         goto out;
2110                 } else {
2111                         err = radeon_ucode_validate(rdev->mec2_fw);
2112                         if (err) {
2113                                 goto out;
2114                         } else {
2115                                 new_fw++;
2116                         }
2117                 }
2118         }
2119
2120         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2121         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2122         if (err) {
2123                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2124                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2125                 if (err)
2126                         goto out;
2127                 if (rdev->rlc_fw->size != rlc_req_size) {
2128                         printk(KERN_ERR
2129                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2130                                rdev->rlc_fw->size, fw_name);
2131                         err = -EINVAL;
2132                 }
2133         } else {
2134                 err = radeon_ucode_validate(rdev->rlc_fw);
2135                 if (err) {
2136                         printk(KERN_ERR
2137                                "cik_fw: validation failed for firmware \"%s\"\n",
2138                                fw_name);
2139                         goto out;
2140                 } else {
2141                         new_fw++;
2142                 }
2143         }
2144
2145         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2146         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2147         if (err) {
2148                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2149                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2150                 if (err)
2151                         goto out;
2152                 if (rdev->sdma_fw->size != sdma_req_size) {
2153                         printk(KERN_ERR
2154                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2155                                rdev->sdma_fw->size, fw_name);
2156                         err = -EINVAL;
2157                 }
2158         } else {
2159                 err = radeon_ucode_validate(rdev->sdma_fw);
2160                 if (err) {
2161                         printk(KERN_ERR
2162                                "cik_fw: validation failed for firmware \"%s\"\n",
2163                                fw_name);
2164                         goto out;
2165                 } else {
2166                         new_fw++;
2167                 }
2168         }
2169
2170         /* No SMC, MC ucode on APUs */
2171         if (!(rdev->flags & RADEON_IS_IGP)) {
2172                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2173                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2174                 if (err) {
2175                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2176                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2177                         if (err) {
2178                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2179                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2180                                 if (err)
2181                                         goto out;
2182                         }
2183                         if ((rdev->mc_fw->size != mc_req_size) &&
2184                             (rdev->mc_fw->size != mc2_req_size)){
2185                                 printk(KERN_ERR
2186                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2187                                        rdev->mc_fw->size, fw_name);
2188                                 err = -EINVAL;
2189                         }
2190                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2191                 } else {
2192                         err = radeon_ucode_validate(rdev->mc_fw);
2193                         if (err) {
2194                                 printk(KERN_ERR
2195                                        "cik_fw: validation failed for firmware \"%s\"\n",
2196                                        fw_name);
2197                                 goto out;
2198                         } else {
2199                                 new_fw++;
2200                         }
2201                 }
2202
2203                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2204                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2205                 if (err) {
2206                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2207                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2208                         if (err) {
2209                                 printk(KERN_ERR
2210                                        "smc: error loading firmware \"%s\"\n",
2211                                        fw_name);
2212                                 release_firmware(rdev->smc_fw);
2213                                 rdev->smc_fw = NULL;
2214                                 err = 0;
2215                         } else if (rdev->smc_fw->size != smc_req_size) {
2216                                 printk(KERN_ERR
2217                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2218                                        rdev->smc_fw->size, fw_name);
2219                                 err = -EINVAL;
2220                         }
2221                 } else {
2222                         err = radeon_ucode_validate(rdev->smc_fw);
2223                         if (err) {
2224                                 printk(KERN_ERR
2225                                        "cik_fw: validation failed for firmware \"%s\"\n",
2226                                        fw_name);
2227                                 goto out;
2228                         } else {
2229                                 new_fw++;
2230                         }
2231                 }
2232         }
2233
2234         if (new_fw == 0) {
2235                 rdev->new_fw = false;
2236         } else if (new_fw < num_fw) {
2237                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2238                 err = -EINVAL;
2239         } else {
2240                 rdev->new_fw = true;
2241         }
2242
2243 out:
2244         if (err) {
2245                 if (err != -EINVAL)
2246                         printk(KERN_ERR
2247                                "cik_cp: Failed to load firmware \"%s\"\n",
2248                                fw_name);
2249                 release_firmware(rdev->pfp_fw);
2250                 rdev->pfp_fw = NULL;
2251                 release_firmware(rdev->me_fw);
2252                 rdev->me_fw = NULL;
2253                 release_firmware(rdev->ce_fw);
2254                 rdev->ce_fw = NULL;
2255                 release_firmware(rdev->mec_fw);
2256                 rdev->mec_fw = NULL;
2257                 release_firmware(rdev->mec2_fw);
2258                 rdev->mec2_fw = NULL;
2259                 release_firmware(rdev->rlc_fw);
2260                 rdev->rlc_fw = NULL;
2261                 release_firmware(rdev->sdma_fw);
2262                 rdev->sdma_fw = NULL;
2263                 release_firmware(rdev->mc_fw);
2264                 rdev->mc_fw = NULL;
2265                 release_firmware(rdev->smc_fw);
2266                 rdev->smc_fw = NULL;
2267         }
2268         return err;
2269 }
2270
2271 /*
2272  * Core functions
2273  */
2274 /**
2275  * cik_tiling_mode_table_init - init the hw tiling table
2276  *
2277  * @rdev: radeon_device pointer
2278  *
2279  * Starting with SI, the tiling setup is done globally in a
2280  * set of 32 tiling modes.  Rather than selecting each set of
2281  * parameters per surface as on older asics, we just select
2282  * which index in the tiling table we want to use, and the
2283  * surface uses those parameters (CIK).
2284  */
2285 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2286 {
2287         const u32 num_tile_mode_states = 32;
2288         const u32 num_secondary_tile_mode_states = 16;
2289         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2290         u32 num_pipe_configs;
2291         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2292                 rdev->config.cik.max_shader_engines;
2293
2294         switch (rdev->config.cik.mem_row_size_in_kb) {
2295         case 1:
2296                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2297                 break;
2298         case 2:
2299         default:
2300                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2301                 break;
2302         case 4:
2303                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2304                 break;
2305         }
2306
2307         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2308         if (num_pipe_configs > 8)
2309                 num_pipe_configs = 16;
2310
2311         if (num_pipe_configs == 16) {
2312                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2313                         switch (reg_offset) {
2314                         case 0:
2315                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2317                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2319                                 break;
2320                         case 1:
2321                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2325                                 break;
2326                         case 2:
2327                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331                                 break;
2332                         case 3:
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2337                                 break;
2338                         case 4:
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                                  TILE_SPLIT(split_equal_to_row_size));
2343                                 break;
2344                         case 5:
2345                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2346                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348                                 break;
2349                         case 6:
2350                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354                                 break;
2355                         case 7:
2356                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                                  TILE_SPLIT(split_equal_to_row_size));
2360                                 break;
2361                         case 8:
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2364                                 break;
2365                         case 9:
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2369                                 break;
2370                         case 10:
2371                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375                                 break;
2376                         case 11:
2377                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2380                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                                 break;
2382                         case 12:
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387                                 break;
2388                         case 13:
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2392                                 break;
2393                         case 14:
2394                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398                                 break;
2399                         case 16:
2400                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2403                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                                 break;
2405                         case 17:
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                                 break;
2411                         case 27:
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2415                                 break;
2416                         case 28:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                                 break;
2422                         case 29:
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                                 break;
2428                         case 30:
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                                 break;
2434                         default:
2435                                 gb_tile_moden = 0;
2436                                 break;
2437                         }
2438                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2439                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2440                 }
2441                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2442                         switch (reg_offset) {
2443                         case 0:
2444                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2448                                 break;
2449                         case 1:
2450                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2454                                 break;
2455                         case 2:
2456                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2460                                 break;
2461                         case 3:
2462                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2466                                 break;
2467                         case 4:
2468                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2472                                 break;
2473                         case 5:
2474                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2478                                 break;
2479                         case 6:
2480                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2484                                 break;
2485                         case 8:
2486                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2490                                 break;
2491                         case 9:
2492                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2496                                 break;
2497                         case 10:
2498                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2502                                 break;
2503                         case 11:
2504                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2508                                 break;
2509                         case 12:
2510                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2514                                 break;
2515                         case 13:
2516                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2520                                 break;
2521                         case 14:
2522                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2526                                 break;
2527                         default:
2528                                 gb_tile_moden = 0;
2529                                 break;
2530                         }
2531                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2532                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2533                 }
2534         } else if (num_pipe_configs == 8) {
2535                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2536                         switch (reg_offset) {
2537                         case 0:
2538                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2542                                 break;
2543                         case 1:
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2548                                 break;
2549                         case 2:
2550                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2552                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2554                                 break;
2555                         case 3:
2556                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2558                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2560                                 break;
2561                         case 4:
2562                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2564                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                                  TILE_SPLIT(split_equal_to_row_size));
2566                                 break;
2567                         case 5:
2568                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                                 break;
2572                         case 6:
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2577                                 break;
2578                         case 7:
2579                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2580                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2581                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                                  TILE_SPLIT(split_equal_to_row_size));
2583                                 break;
2584                         case 8:
2585                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2587                                 break;
2588                         case 9:
2589                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2592                                 break;
2593                         case 10:
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                                 break;
2599                         case 11:
2600                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604                                 break;
2605                         case 12:
2606                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2607                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610                                 break;
2611                         case 13:
2612                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2615                                 break;
2616                         case 14:
2617                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                                 break;
2622                         case 16:
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                                 break;
2628                         case 17:
2629                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                                 break;
2634                         case 27:
2635                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638                                 break;
2639                         case 28:
2640                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2642                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                 break;
2645                         case 29:
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                 break;
2651                         case 30:
2652                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                 break;
2657                         default:
2658                                 gb_tile_moden = 0;
2659                                 break;
2660                         }
2661                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2662                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2663                 }
2664                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2665                         switch (reg_offset) {
2666                         case 0:
2667                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                                 break;
2672                         case 1:
2673                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2675                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                                 break;
2678                         case 2:
2679                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2683                                 break;
2684                         case 3:
2685                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2689                                 break;
2690                         case 4:
2691                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2695                                 break;
2696                         case 5:
2697                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2700                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2701                                 break;
2702                         case 6:
2703                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2707                                 break;
2708                         case 8:
2709                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2713                                 break;
2714                         case 9:
2715                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2719                                 break;
2720                         case 10:
2721                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2725                                 break;
2726                         case 11:
2727                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2731                                 break;
2732                         case 12:
2733                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2737                                 break;
2738                         case 13:
2739                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2743                                 break;
2744                         case 14:
2745                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2749                                 break;
2750                         default:
2751                                 gb_tile_moden = 0;
2752                                 break;
2753                         }
2754                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2755                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756                 }
2757         } else if (num_pipe_configs == 4) {
2758                 if (num_rbs == 4) {
2759                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2760                                 switch (reg_offset) {
2761                                 case 0:
2762                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2766                                         break;
2767                                 case 1:
2768                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2772                                         break;
2773                                 case 2:
2774                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2776                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2778                                         break;
2779                                 case 3:
2780                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2782                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2784                                         break;
2785                                 case 4:
2786                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2788                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                                          TILE_SPLIT(split_equal_to_row_size));
2790                                         break;
2791                                 case 5:
2792                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2793                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795                                         break;
2796                                 case 6:
2797                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2798                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801                                         break;
2802                                 case 7:
2803                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                                          TILE_SPLIT(split_equal_to_row_size));
2807                                         break;
2808                                 case 8:
2809                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2810                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2811                                         break;
2812                                 case 9:
2813                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2816                                         break;
2817                                 case 10:
2818                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2820                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                                         break;
2823                                 case 11:
2824                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828                                         break;
2829                                 case 12:
2830                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2831                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834                                         break;
2835                                 case 13:
2836                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2839                                         break;
2840                                 case 14:
2841                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845                                         break;
2846                                 case 16:
2847                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                                         break;
2852                                 case 17:
2853                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2854                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                                         break;
2858                                 case 27:
2859                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2862                                         break;
2863                                 case 28:
2864                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868                                         break;
2869                                 case 29:
2870                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                                         break;
2875                                 case 30:
2876                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2877                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880                                         break;
2881                                 default:
2882                                         gb_tile_moden = 0;
2883                                         break;
2884                                 }
2885                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2886                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2887                         }
2888                 } else if (num_rbs < 4) {
2889                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2890                                 switch (reg_offset) {
2891                                 case 0:
2892                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2895                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896                                         break;
2897                                 case 1:
2898                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2901                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902                                         break;
2903                                 case 2:
2904                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                                         break;
2909                                 case 3:
2910                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2913                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2914                                         break;
2915                                 case 4:
2916                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2919                                                          TILE_SPLIT(split_equal_to_row_size));
2920                                         break;
2921                                 case 5:
2922                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                                         break;
2926                                 case 6:
2927                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2930                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931                                         break;
2932                                 case 7:
2933                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2934                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2935                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2936                                                          TILE_SPLIT(split_equal_to_row_size));
2937                                         break;
2938                                 case 8:
2939                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2941                                         break;
2942                                 case 9:
2943                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2945                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2946                                         break;
2947                                 case 10:
2948                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2949                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2951                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952                                         break;
2953                                 case 11:
2954                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2955                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2957                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                                         break;
2959                                 case 12:
2960                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2962                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964                                         break;
2965                                 case 13:
2966                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2967                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2968                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2969                                         break;
2970                                 case 14:
2971                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2974                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975                                         break;
2976                                 case 16:
2977                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2979                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2980                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                                         break;
2982                                 case 17:
2983                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2984                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2985                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2986                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                                         break;
2988                                 case 27:
2989                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2992                                         break;
2993                                 case 28:
2994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998                                         break;
2999                                 case 29:
3000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3002                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3003                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3004                                         break;
3005                                 case 30:
3006                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3007                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3009                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                                         break;
3011                                 default:
3012                                         gb_tile_moden = 0;
3013                                         break;
3014                                 }
3015                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3016                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3017                         }
3018                 }
3019                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3020                         switch (reg_offset) {
3021                         case 0:
3022                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3024                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3025                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3026                                 break;
3027                         case 1:
3028                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3032                                 break;
3033                         case 2:
3034                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3037                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3038                                 break;
3039                         case 3:
3040                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3044                                 break;
3045                         case 4:
3046                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3050                                 break;
3051                         case 5:
3052                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3056                                 break;
3057                         case 6:
3058                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3062                                 break;
3063                         case 8:
3064                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3065                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3068                                 break;
3069                         case 9:
3070                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3074                                 break;
3075                         case 10:
3076                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3080                                 break;
3081                         case 11:
3082                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3084                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3086                                 break;
3087                         case 12:
3088                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3091                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3092                                 break;
3093                         case 13:
3094                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3097                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3098                                 break;
3099                         case 14:
3100                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3103                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3104                                 break;
3105                         default:
3106                                 gb_tile_moden = 0;
3107                                 break;
3108                         }
3109                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3110                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3111                 }
3112         } else if (num_pipe_configs == 2) {
3113                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3114                         switch (reg_offset) {
3115                         case 0:
3116                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3118                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3119                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3120                                 break;
3121                         case 1:
3122                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3124                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3126                                 break;
3127                         case 2:
3128                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3129                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3130                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3131                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3132                                 break;
3133                         case 3:
3134                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3136                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3138                                 break;
3139                         case 4:
3140                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3142                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                                  TILE_SPLIT(split_equal_to_row_size));
3144                                 break;
3145                         case 5:
3146                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3147                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149                                 break;
3150                         case 6:
3151                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3152                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3153                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3155                                 break;
3156                         case 7:
3157                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3158                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3159                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                                  TILE_SPLIT(split_equal_to_row_size));
3161                                 break;
3162                         case 8:
3163                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3164                                                 PIPE_CONFIG(ADDR_SURF_P2);
3165                                 break;
3166                         case 9:
3167                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3169                                                  PIPE_CONFIG(ADDR_SURF_P2));
3170                                 break;
3171                         case 10:
3172                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3174                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176                                 break;
3177                         case 11:
3178                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3179                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3180                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182                                 break;
3183                         case 12:
3184                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3185                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3186                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188                                 break;
3189                         case 13:
3190                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3192                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3193                                 break;
3194                         case 14:
3195                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3197                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199                                 break;
3200                         case 16:
3201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3204                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205                                 break;
3206                         case 17:
3207                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3208                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3209                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211                                 break;
3212                         case 27:
3213                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3214                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3215                                                  PIPE_CONFIG(ADDR_SURF_P2));
3216                                 break;
3217                         case 28:
3218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3220                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                                 break;
3223                         case 29:
3224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3227                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228                                 break;
3229                         case 30:
3230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234                                 break;
3235                         default:
3236                                 gb_tile_moden = 0;
3237                                 break;
3238                         }
3239                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3240                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3241                 }
3242                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3243                         switch (reg_offset) {
3244                         case 0:
3245                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3247                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                                 break;
3250                         case 1:
3251                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3252                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3253                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3255                                 break;
3256                         case 2:
3257                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3259                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3261                                 break;
3262                         case 3:
3263                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3264                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3265                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3267                                 break;
3268                         case 4:
3269                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3273                                 break;
3274                         case 5:
3275                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3277                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3279                                 break;
3280                         case 6:
3281                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3285                                 break;
3286                         case 8:
3287                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3291                                 break;
3292                         case 9:
3293                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3297                                 break;
3298                         case 10:
3299                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3300                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3301                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3302                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3303                                 break;
3304                         case 11:
3305                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3309                                 break;
3310                         case 12:
3311                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3312                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3313                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3315                                 break;
3316                         case 13:
3317                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3321                                 break;
3322                         case 14:
3323                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3327                                 break;
3328                         default:
3329                                 gb_tile_moden = 0;
3330                                 break;
3331                         }
3332                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3333                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3334                 }
3335         } else
3336                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3337 }
3338
3339 /**
3340  * cik_select_se_sh - select which SE, SH to address
3341  *
3342  * @rdev: radeon_device pointer
3343  * @se_num: shader engine to address
3344  * @sh_num: sh block to address
3345  *
3346  * Select which SE, SH combinations to address. Certain
3347  * registers are instanced per SE or SH.  0xffffffff means
3348  * broadcast to all SEs or SHs (CIK).
3349  */
3350 static void cik_select_se_sh(struct radeon_device *rdev,
3351                              u32 se_num, u32 sh_num)
3352 {
3353         u32 data = INSTANCE_BROADCAST_WRITES;
3354
3355         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3356                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3357         else if (se_num == 0xffffffff)
3358                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3359         else if (sh_num == 0xffffffff)
3360                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3361         else
3362                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3363         WREG32(GRBM_GFX_INDEX, data);
3364 }
3365
3366 /**
3367  * cik_create_bitmask - create a bitmask
3368  *
3369  * @bit_width: length of the mask
3370  *
3371  * create a variable length bit mask (CIK).
3372  * Returns the bitmask.
3373  */
3374 static u32 cik_create_bitmask(u32 bit_width)
3375 {
3376         u32 i, mask = 0;
3377
3378         for (i = 0; i < bit_width; i++) {
3379                 mask <<= 1;
3380                 mask |= 1;
3381         }
3382         return mask;
3383 }
3384
3385 /**
3386  * cik_get_rb_disabled - computes the mask of disabled RBs
3387  *
3388  * @rdev: radeon_device pointer
3389  * @max_rb_num: max RBs (render backends) for the asic
3390  * @se_num: number of SEs (shader engines) for the asic
3391  * @sh_per_se: number of SH blocks per SE for the asic
3392  *
3393  * Calculates the bitmask of disabled RBs (CIK).
3394  * Returns the disabled RB bitmask.
3395  */
3396 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3397                               u32 max_rb_num_per_se,
3398                               u32 sh_per_se)
3399 {
3400         u32 data, mask;
3401
3402         data = RREG32(CC_RB_BACKEND_DISABLE);
3403         if (data & 1)
3404                 data &= BACKEND_DISABLE_MASK;
3405         else
3406                 data = 0;
3407         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3408
3409         data >>= BACKEND_DISABLE_SHIFT;
3410
3411         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3412
3413         return data & mask;
3414 }
3415
3416 /**
3417  * cik_setup_rb - setup the RBs on the asic
3418  *
3419  * @rdev: radeon_device pointer
3420  * @se_num: number of SEs (shader engines) for the asic
3421  * @sh_per_se: number of SH blocks per SE for the asic
3422  * @max_rb_num: max RBs (render backends) for the asic
3423  *
3424  * Configures per-SE/SH RB registers (CIK).
3425  */
3426 static void cik_setup_rb(struct radeon_device *rdev,
3427                          u32 se_num, u32 sh_per_se,
3428                          u32 max_rb_num_per_se)
3429 {
3430         int i, j;
3431         u32 data, mask;
3432         u32 disabled_rbs = 0;
3433         u32 enabled_rbs = 0;
3434
3435         mutex_lock(&rdev->grbm_idx_mutex);
3436         for (i = 0; i < se_num; i++) {
3437                 for (j = 0; j < sh_per_se; j++) {
3438                         cik_select_se_sh(rdev, i, j);
3439                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3440                         if (rdev->family == CHIP_HAWAII)
3441                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3442                         else
3443                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3444                 }
3445         }
3446         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3447         mutex_unlock(&rdev->grbm_idx_mutex);
3448
3449         mask = 1;
3450         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3451                 if (!(disabled_rbs & mask))
3452                         enabled_rbs |= mask;
3453                 mask <<= 1;
3454         }
3455
3456         rdev->config.cik.backend_enable_mask = enabled_rbs;
3457
3458         mutex_lock(&rdev->grbm_idx_mutex);
3459         for (i = 0; i < se_num; i++) {
3460                 cik_select_se_sh(rdev, i, 0xffffffff);
3461                 data = 0;
3462                 for (j = 0; j < sh_per_se; j++) {
3463                         switch (enabled_rbs & 3) {
3464                         case 0:
3465                                 if (j == 0)
3466                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3467                                 else
3468                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3469                                 break;
3470                         case 1:
3471                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3472                                 break;
3473                         case 2:
3474                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3475                                 break;
3476                         case 3:
3477                         default:
3478                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3479                                 break;
3480                         }
3481                         enabled_rbs >>= 2;
3482                 }
3483                 WREG32(PA_SC_RASTER_CONFIG, data);
3484         }
3485         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3486         mutex_unlock(&rdev->grbm_idx_mutex);
3487 }
3488
3489 /**
3490  * cik_gpu_init - setup the 3D engine
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Configures the 3D engine and tiling configuration
3495  * registers so that the 3D engine is usable.
3496  */
3497 static void cik_gpu_init(struct radeon_device *rdev)
3498 {
3499         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3500         u32 mc_shared_chmap, mc_arb_ramcfg;
3501         u32 hdp_host_path_cntl;
3502         u32 tmp;
3503         int i, j;
3504
3505         switch (rdev->family) {
3506         case CHIP_BONAIRE:
3507                 rdev->config.cik.max_shader_engines = 2;
3508                 rdev->config.cik.max_tile_pipes = 4;
3509                 rdev->config.cik.max_cu_per_sh = 7;
3510                 rdev->config.cik.max_sh_per_se = 1;
3511                 rdev->config.cik.max_backends_per_se = 2;
3512                 rdev->config.cik.max_texture_channel_caches = 4;
3513                 rdev->config.cik.max_gprs = 256;
3514                 rdev->config.cik.max_gs_threads = 32;
3515                 rdev->config.cik.max_hw_contexts = 8;
3516
3517                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3522                 break;
3523         case CHIP_HAWAII:
3524                 rdev->config.cik.max_shader_engines = 4;
3525                 rdev->config.cik.max_tile_pipes = 16;
3526                 rdev->config.cik.max_cu_per_sh = 11;
3527                 rdev->config.cik.max_sh_per_se = 1;
3528                 rdev->config.cik.max_backends_per_se = 4;
3529                 rdev->config.cik.max_texture_channel_caches = 16;
3530                 rdev->config.cik.max_gprs = 256;
3531                 rdev->config.cik.max_gs_threads = 32;
3532                 rdev->config.cik.max_hw_contexts = 8;
3533
3534                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3535                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3536                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3537                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3538                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3539                 break;
3540         case CHIP_KAVERI:
3541                 rdev->config.cik.max_shader_engines = 1;
3542                 rdev->config.cik.max_tile_pipes = 4;
3543                 if ((rdev->pdev->device == 0x1304) ||
3544                     (rdev->pdev->device == 0x1305) ||
3545                     (rdev->pdev->device == 0x130C) ||
3546                     (rdev->pdev->device == 0x130F) ||
3547                     (rdev->pdev->device == 0x1310) ||
3548                     (rdev->pdev->device == 0x1311) ||
3549                     (rdev->pdev->device == 0x131C)) {
3550                         rdev->config.cik.max_cu_per_sh = 8;
3551                         rdev->config.cik.max_backends_per_se = 2;
3552                 } else if ((rdev->pdev->device == 0x1309) ||
3553                            (rdev->pdev->device == 0x130A) ||
3554                            (rdev->pdev->device == 0x130D) ||
3555                            (rdev->pdev->device == 0x1313) ||
3556                            (rdev->pdev->device == 0x131D)) {
3557                         rdev->config.cik.max_cu_per_sh = 6;
3558                         rdev->config.cik.max_backends_per_se = 2;
3559                 } else if ((rdev->pdev->device == 0x1306) ||
3560                            (rdev->pdev->device == 0x1307) ||
3561                            (rdev->pdev->device == 0x130B) ||
3562                            (rdev->pdev->device == 0x130E) ||
3563                            (rdev->pdev->device == 0x1315) ||
3564                            (rdev->pdev->device == 0x1318) ||
3565                            (rdev->pdev->device == 0x131B)) {
3566                         rdev->config.cik.max_cu_per_sh = 4;
3567                         rdev->config.cik.max_backends_per_se = 1;
3568                 } else {
3569                         rdev->config.cik.max_cu_per_sh = 3;
3570                         rdev->config.cik.max_backends_per_se = 1;
3571                 }
3572                 rdev->config.cik.max_sh_per_se = 1;
3573                 rdev->config.cik.max_texture_channel_caches = 4;
3574                 rdev->config.cik.max_gprs = 256;
3575                 rdev->config.cik.max_gs_threads = 16;
3576                 rdev->config.cik.max_hw_contexts = 8;
3577
3578                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3579                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3580                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3581                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3582                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3583                 break;
3584         case CHIP_KABINI:
3585         case CHIP_MULLINS:
3586         default:
3587                 rdev->config.cik.max_shader_engines = 1;
3588                 rdev->config.cik.max_tile_pipes = 2;
3589                 rdev->config.cik.max_cu_per_sh = 2;
3590                 rdev->config.cik.max_sh_per_se = 1;
3591                 rdev->config.cik.max_backends_per_se = 1;
3592                 rdev->config.cik.max_texture_channel_caches = 2;
3593                 rdev->config.cik.max_gprs = 256;
3594                 rdev->config.cik.max_gs_threads = 16;
3595                 rdev->config.cik.max_hw_contexts = 8;
3596
3597                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3598                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3599                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3600                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3601                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3602                 break;
3603         }
3604
3605         /* Initialize HDP */
3606         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3607                 WREG32((0x2c14 + j), 0x00000000);
3608                 WREG32((0x2c18 + j), 0x00000000);
3609                 WREG32((0x2c1c + j), 0x00000000);
3610                 WREG32((0x2c20 + j), 0x00000000);
3611                 WREG32((0x2c24 + j), 0x00000000);
3612         }
3613
3614         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3615
3616         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3617
3618         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3619         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3620
3621         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3622         rdev->config.cik.mem_max_burst_length_bytes = 256;
3623         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3624         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3625         if (rdev->config.cik.mem_row_size_in_kb > 4)
3626                 rdev->config.cik.mem_row_size_in_kb = 4;
3627         /* XXX use MC settings? */
3628         rdev->config.cik.shader_engine_tile_size = 32;
3629         rdev->config.cik.num_gpus = 1;
3630         rdev->config.cik.multi_gpu_tile_size = 64;
3631
3632         /* fix up row size */
3633         gb_addr_config &= ~ROW_SIZE_MASK;
3634         switch (rdev->config.cik.mem_row_size_in_kb) {
3635         case 1:
3636         default:
3637                 gb_addr_config |= ROW_SIZE(0);
3638                 break;
3639         case 2:
3640                 gb_addr_config |= ROW_SIZE(1);
3641                 break;
3642         case 4:
3643                 gb_addr_config |= ROW_SIZE(2);
3644                 break;
3645         }
3646
3647         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3648          * not have bank info, so create a custom tiling dword.
3649          * bits 3:0   num_pipes
3650          * bits 7:4   num_banks
3651          * bits 11:8  group_size
3652          * bits 15:12 row_size
3653          */
3654         rdev->config.cik.tile_config = 0;
3655         switch (rdev->config.cik.num_tile_pipes) {
3656         case 1:
3657                 rdev->config.cik.tile_config |= (0 << 0);
3658                 break;
3659         case 2:
3660                 rdev->config.cik.tile_config |= (1 << 0);
3661                 break;
3662         case 4:
3663                 rdev->config.cik.tile_config |= (2 << 0);
3664                 break;
3665         case 8:
3666         default:
3667                 /* XXX what about 12? */
3668                 rdev->config.cik.tile_config |= (3 << 0);
3669                 break;
3670         }
3671         rdev->config.cik.tile_config |=
3672                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3673         rdev->config.cik.tile_config |=
3674                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3675         rdev->config.cik.tile_config |=
3676                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3677
3678         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3679         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3680         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3681         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3682         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3683         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3684         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3685         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3686
3687         cik_tiling_mode_table_init(rdev);
3688
3689         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3690                      rdev->config.cik.max_sh_per_se,
3691                      rdev->config.cik.max_backends_per_se);
3692
3693         rdev->config.cik.active_cus = 0;
3694         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3695                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3696                         rdev->config.cik.active_cus +=
3697                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3698                 }
3699         }
3700
3701         /* set HW defaults for 3D engine */
3702         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3703
3704         mutex_lock(&rdev->grbm_idx_mutex);
3705         /*
3706          * making sure that the following register writes will be broadcasted
3707          * to all the shaders
3708          */
3709         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3710         WREG32(SX_DEBUG_1, 0x20);
3711
3712         WREG32(TA_CNTL_AUX, 0x00010000);
3713
3714         tmp = RREG32(SPI_CONFIG_CNTL);
3715         tmp |= 0x03000000;
3716         WREG32(SPI_CONFIG_CNTL, tmp);
3717
3718         WREG32(SQ_CONFIG, 1);
3719
3720         WREG32(DB_DEBUG, 0);
3721
3722         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3723         tmp |= 0x00000400;
3724         WREG32(DB_DEBUG2, tmp);
3725
3726         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3727         tmp |= 0x00020200;
3728         WREG32(DB_DEBUG3, tmp);
3729
3730         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3731         tmp |= 0x00018208;
3732         WREG32(CB_HW_CONTROL, tmp);
3733
3734         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3735
3736         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3737                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3738                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3739                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3740
3741         WREG32(VGT_NUM_INSTANCES, 1);
3742
3743         WREG32(CP_PERFMON_CNTL, 0);
3744
3745         WREG32(SQ_CONFIG, 0);
3746
3747         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3748                                           FORCE_EOV_MAX_REZ_CNT(255)));
3749
3750         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3751                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3752
3753         WREG32(VGT_GS_VERTEX_REUSE, 16);
3754         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3755
3756         tmp = RREG32(HDP_MISC_CNTL);
3757         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3758         WREG32(HDP_MISC_CNTL, tmp);
3759
3760         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3761         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3762
3763         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3764         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3765         mutex_unlock(&rdev->grbm_idx_mutex);
3766
3767         udelay(50);
3768 }
3769
3770 /*
3771  * GPU scratch registers helpers function.
3772  */
3773 /**
3774  * cik_scratch_init - setup driver info for CP scratch regs
3775  *
3776  * @rdev: radeon_device pointer
3777  *
3778  * Set up the number and offset of the CP scratch registers.
3779  * NOTE: use of CP scratch registers is a legacy inferface and
3780  * is not used by default on newer asics (r6xx+).  On newer asics,
3781  * memory buffers are used for fences rather than scratch regs.
3782  */
3783 static void cik_scratch_init(struct radeon_device *rdev)
3784 {
3785         int i;
3786
3787         rdev->scratch.num_reg = 7;
3788         rdev->scratch.reg_base = SCRATCH_REG0;
3789         for (i = 0; i < rdev->scratch.num_reg; i++) {
3790                 rdev->scratch.free[i] = true;
3791                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3792         }
3793 }
3794
3795 /**
3796  * cik_ring_test - basic gfx ring test
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ring: radeon_ring structure holding ring information
3800  *
3801  * Allocate a scratch register and write to it using the gfx ring (CIK).
3802  * Provides a basic gfx ring test to verify that the ring is working.
3803  * Used by cik_cp_gfx_resume();
3804  * Returns 0 on success, error on failure.
3805  */
3806 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807 {
3808         uint32_t scratch;
3809         uint32_t tmp = 0;
3810         unsigned i;
3811         int r;
3812
3813         r = radeon_scratch_get(rdev, &scratch);
3814         if (r) {
3815                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3816                 return r;
3817         }
3818         WREG32(scratch, 0xCAFEDEAD);
3819         r = radeon_ring_lock(rdev, ring, 3);
3820         if (r) {
3821                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3822                 radeon_scratch_free(rdev, scratch);
3823                 return r;
3824         }
3825         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3826         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3827         radeon_ring_write(ring, 0xDEADBEEF);
3828         radeon_ring_unlock_commit(rdev, ring, false);
3829
3830         for (i = 0; i < rdev->usec_timeout; i++) {
3831                 tmp = RREG32(scratch);
3832                 if (tmp == 0xDEADBEEF)
3833                         break;
3834                 DRM_UDELAY(1);
3835         }
3836         if (i < rdev->usec_timeout) {
3837                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3838         } else {
3839                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3840                           ring->idx, scratch, tmp);
3841                 r = -EINVAL;
3842         }
3843         radeon_scratch_free(rdev, scratch);
3844         return r;
3845 }
3846
3847 /**
3848  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3849  *
3850  * @rdev: radeon_device pointer
3851  * @ridx: radeon ring index
3852  *
3853  * Emits an hdp flush on the cp.
3854  */
3855 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3856                                        int ridx)
3857 {
3858         struct radeon_ring *ring = &rdev->ring[ridx];
3859         u32 ref_and_mask;
3860
3861         switch (ring->idx) {
3862         case CAYMAN_RING_TYPE_CP1_INDEX:
3863         case CAYMAN_RING_TYPE_CP2_INDEX:
3864         default:
3865                 switch (ring->me) {
3866                 case 0:
3867                         ref_and_mask = CP2 << ring->pipe;
3868                         break;
3869                 case 1:
3870                         ref_and_mask = CP6 << ring->pipe;
3871                         break;
3872                 default:
3873                         return;
3874                 }
3875                 break;
3876         case RADEON_RING_TYPE_GFX_INDEX:
3877                 ref_and_mask = CP0;
3878                 break;
3879         }
3880
3881         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3882         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3883                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3884                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3885         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3886         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3887         radeon_ring_write(ring, ref_and_mask);
3888         radeon_ring_write(ring, ref_and_mask);
3889         radeon_ring_write(ring, 0x20); /* poll interval */
3890 }
3891
3892 /**
3893  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3894  *
3895  * @rdev: radeon_device pointer
3896  * @fence: radeon fence object
3897  *
3898  * Emits a fence sequnce number on the gfx ring and flushes
3899  * GPU caches.
3900  */
3901 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3902                              struct radeon_fence *fence)
3903 {
3904         struct radeon_ring *ring = &rdev->ring[fence->ring];
3905         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3906
3907         /* EVENT_WRITE_EOP - flush caches, send int */
3908         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3909         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3910                                  EOP_TC_ACTION_EN |
3911                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3912                                  EVENT_INDEX(5)));
3913         radeon_ring_write(ring, addr & 0xfffffffc);
3914         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3915         radeon_ring_write(ring, fence->seq);
3916         radeon_ring_write(ring, 0);
3917 }
3918
3919 /**
3920  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3921  *
3922  * @rdev: radeon_device pointer
3923  * @fence: radeon fence object
3924  *
3925  * Emits a fence sequnce number on the compute ring and flushes
3926  * GPU caches.
3927  */
3928 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3929                                  struct radeon_fence *fence)
3930 {
3931         struct radeon_ring *ring = &rdev->ring[fence->ring];
3932         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3933
3934         /* RELEASE_MEM - flush caches, send int */
3935         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3936         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3937                                  EOP_TC_ACTION_EN |
3938                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3939                                  EVENT_INDEX(5)));
3940         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3941         radeon_ring_write(ring, addr & 0xfffffffc);
3942         radeon_ring_write(ring, upper_32_bits(addr));
3943         radeon_ring_write(ring, fence->seq);
3944         radeon_ring_write(ring, 0);
3945 }
3946
3947 /**
3948  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3949  *
3950  * @rdev: radeon_device pointer
3951  * @ring: radeon ring buffer object
3952  * @semaphore: radeon semaphore object
3953  * @emit_wait: Is this a sempahore wait?
3954  *
3955  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3956  * from running ahead of semaphore waits.
3957  */
3958 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3959                              struct radeon_ring *ring,
3960                              struct radeon_semaphore *semaphore,
3961                              bool emit_wait)
3962 {
3963         uint64_t addr = semaphore->gpu_addr;
3964         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3965
3966         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3967         radeon_ring_write(ring, lower_32_bits(addr));
3968         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3969
3970         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3971                 /* Prevent the PFP from running ahead of the semaphore wait */
3972                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3973                 radeon_ring_write(ring, 0x0);
3974         }
3975
3976         return true;
3977 }
3978
3979 /**
3980  * cik_copy_cpdma - copy pages using the CP DMA engine
3981  *
3982  * @rdev: radeon_device pointer
3983  * @src_offset: src GPU address
3984  * @dst_offset: dst GPU address
3985  * @num_gpu_pages: number of GPU pages to xfer
3986  * @resv: reservation object to sync to
3987  *
3988  * Copy GPU paging using the CP DMA engine (CIK+).
3989  * Used by the radeon ttm implementation to move pages if
3990  * registered as the asic copy callback.
3991  */
3992 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3993                                     uint64_t src_offset, uint64_t dst_offset,
3994                                     unsigned num_gpu_pages,
3995                                     struct reservation_object *resv)
3996 {
3997         struct radeon_fence *fence;
3998         struct radeon_sync sync;
3999         int ring_index = rdev->asic->copy.blit_ring_index;
4000         struct radeon_ring *ring = &rdev->ring[ring_index];
4001         u32 size_in_bytes, cur_size_in_bytes, control;
4002         int i, num_loops;
4003         int r = 0;
4004
4005         radeon_sync_create(&sync);
4006
4007         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4008         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4009         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4010         if (r) {
4011                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4012                 radeon_sync_free(rdev, &sync, NULL);
4013                 return ERR_PTR(r);
4014         }
4015
4016         radeon_sync_resv(rdev, &sync, resv, false);
4017         radeon_sync_rings(rdev, &sync, ring->idx);
4018
4019         for (i = 0; i < num_loops; i++) {
4020                 cur_size_in_bytes = size_in_bytes;
4021                 if (cur_size_in_bytes > 0x1fffff)
4022                         cur_size_in_bytes = 0x1fffff;
4023                 size_in_bytes -= cur_size_in_bytes;
4024                 control = 0;
4025                 if (size_in_bytes == 0)
4026                         control |= PACKET3_DMA_DATA_CP_SYNC;
4027                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4028                 radeon_ring_write(ring, control);
4029                 radeon_ring_write(ring, lower_32_bits(src_offset));
4030                 radeon_ring_write(ring, upper_32_bits(src_offset));
4031                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4032                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4033                 radeon_ring_write(ring, cur_size_in_bytes);
4034                 src_offset += cur_size_in_bytes;
4035                 dst_offset += cur_size_in_bytes;
4036         }
4037
4038         r = radeon_fence_emit(rdev, &fence, ring->idx);
4039         if (r) {
4040                 radeon_ring_unlock_undo(rdev, ring);
4041                 radeon_sync_free(rdev, &sync, NULL);
4042                 return ERR_PTR(r);
4043         }
4044
4045         radeon_ring_unlock_commit(rdev, ring, false);
4046         radeon_sync_free(rdev, &sync, fence);
4047
4048         return fence;
4049 }
4050
4051 /*
4052  * IB stuff
4053  */
4054 /**
4055  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4056  *
4057  * @rdev: radeon_device pointer
4058  * @ib: radeon indirect buffer object
4059  *
4060  * Emits an DE (drawing engine) or CE (constant engine) IB
4061  * on the gfx ring.  IBs are usually generated by userspace
4062  * acceleration drivers and submitted to the kernel for
4063  * sheduling on the ring.  This function schedules the IB
4064  * on the gfx ring for execution by the GPU.
4065  */
4066 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4067 {
4068         struct radeon_ring *ring = &rdev->ring[ib->ring];
4069         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4070         u32 header, control = INDIRECT_BUFFER_VALID;
4071
4072         if (ib->is_const_ib) {
4073                 /* set switch buffer packet before const IB */
4074                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4075                 radeon_ring_write(ring, 0);
4076
4077                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4078         } else {
4079                 u32 next_rptr;
4080                 if (ring->rptr_save_reg) {
4081                         next_rptr = ring->wptr + 3 + 4;
4082                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4083                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4084                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4085                         radeon_ring_write(ring, next_rptr);
4086                 } else if (rdev->wb.enabled) {
4087                         next_rptr = ring->wptr + 5 + 4;
4088                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4089                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4090                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4091                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4092                         radeon_ring_write(ring, next_rptr);
4093                 }
4094
4095                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4096         }
4097
4098         control |= ib->length_dw | (vm_id << 24);
4099
4100         radeon_ring_write(ring, header);
4101         radeon_ring_write(ring,
4102 #ifdef __BIG_ENDIAN
4103                           (2 << 0) |
4104 #endif
4105                           (ib->gpu_addr & 0xFFFFFFFC));
4106         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4107         radeon_ring_write(ring, control);
4108 }
4109
4110 /**
4111  * cik_ib_test - basic gfx ring IB test
4112  *
4113  * @rdev: radeon_device pointer
4114  * @ring: radeon_ring structure holding ring information
4115  *
4116  * Allocate an IB and execute it on the gfx ring (CIK).
4117  * Provides a basic gfx ring test to verify that IBs are working.
4118  * Returns 0 on success, error on failure.
4119  */
4120 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4121 {
4122         struct radeon_ib ib;
4123         uint32_t scratch;
4124         uint32_t tmp = 0;
4125         unsigned i;
4126         int r;
4127
4128         r = radeon_scratch_get(rdev, &scratch);
4129         if (r) {
4130                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4131                 return r;
4132         }
4133         WREG32(scratch, 0xCAFEDEAD);
4134         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4135         if (r) {
4136                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4137                 radeon_scratch_free(rdev, scratch);
4138                 return r;
4139         }
4140         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4141         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4142         ib.ptr[2] = 0xDEADBEEF;
4143         ib.length_dw = 3;
4144         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4145         if (r) {
4146                 radeon_scratch_free(rdev, scratch);
4147                 radeon_ib_free(rdev, &ib);
4148                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4149                 return r;
4150         }
4151         r = radeon_fence_wait(ib.fence, false);
4152         if (r) {
4153                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4154                 radeon_scratch_free(rdev, scratch);
4155                 radeon_ib_free(rdev, &ib);
4156                 return r;
4157         }
4158         for (i = 0; i < rdev->usec_timeout; i++) {
4159                 tmp = RREG32(scratch);
4160                 if (tmp == 0xDEADBEEF)
4161                         break;
4162                 DRM_UDELAY(1);
4163         }
4164         if (i < rdev->usec_timeout) {
4165                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4166         } else {
4167                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4168                           scratch, tmp);
4169                 r = -EINVAL;
4170         }
4171         radeon_scratch_free(rdev, scratch);
4172         radeon_ib_free(rdev, &ib);
4173         return r;
4174 }
4175
4176 /*
4177  * CP.
4178  * On CIK, gfx and compute now have independant command processors.
4179  *
4180  * GFX
4181  * Gfx consists of a single ring and can process both gfx jobs and
4182  * compute jobs.  The gfx CP consists of three microengines (ME):
4183  * PFP - Pre-Fetch Parser
4184  * ME - Micro Engine
4185  * CE - Constant Engine
4186  * The PFP and ME make up what is considered the Drawing Engine (DE).
4187  * The CE is an asynchronous engine used for updating buffer desciptors
4188  * used by the DE so that they can be loaded into cache in parallel
4189  * while the DE is processing state update packets.
4190  *
4191  * Compute
4192  * The compute CP consists of two microengines (ME):
4193  * MEC1 - Compute MicroEngine 1
4194  * MEC2 - Compute MicroEngine 2
4195  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4196  * The queues are exposed to userspace and are programmed directly
4197  * by the compute runtime.
4198  */
4199 /**
4200  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4201  *
4202  * @rdev: radeon_device pointer
4203  * @enable: enable or disable the MEs
4204  *
4205  * Halts or unhalts the gfx MEs.
4206  */
4207 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4208 {
4209         if (enable)
4210                 WREG32(CP_ME_CNTL, 0);
4211         else {
4212                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4213                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4214                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4215                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4216         }
4217         udelay(50);
4218 }
4219
4220 /**
4221  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4222  *
4223  * @rdev: radeon_device pointer
4224  *
4225  * Loads the gfx PFP, ME, and CE ucode.
4226  * Returns 0 for success, -EINVAL if the ucode is not available.
4227  */
4228 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4229 {
4230         int i;
4231
4232         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4233                 return -EINVAL;
4234
4235         cik_cp_gfx_enable(rdev, false);
4236
4237         if (rdev->new_fw) {
4238                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4239                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4240                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4241                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4242                 const struct gfx_firmware_header_v1_0 *me_hdr =
4243                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4244                 const __le32 *fw_data;
4245                 u32 fw_size;
4246
4247                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4248                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4249                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4250
4251                 /* PFP */
4252                 fw_data = (const __le32 *)
4253                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4254                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4255                 WREG32(CP_PFP_UCODE_ADDR, 0);
4256                 for (i = 0; i < fw_size; i++)
4257                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4258                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4259
4260                 /* CE */
4261                 fw_data = (const __le32 *)
4262                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4263                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4264                 WREG32(CP_CE_UCODE_ADDR, 0);
4265                 for (i = 0; i < fw_size; i++)
4266                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4267                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4268
4269                 /* ME */
4270                 fw_data = (const __be32 *)
4271                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4272                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4273                 WREG32(CP_ME_RAM_WADDR, 0);
4274                 for (i = 0; i < fw_size; i++)
4275                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4276                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4277                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4278         } else {
4279                 const __be32 *fw_data;
4280
4281                 /* PFP */
4282                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4283                 WREG32(CP_PFP_UCODE_ADDR, 0);
4284                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4285                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4286                 WREG32(CP_PFP_UCODE_ADDR, 0);
4287
4288                 /* CE */
4289                 fw_data = (const __be32 *)rdev->ce_fw->data;
4290                 WREG32(CP_CE_UCODE_ADDR, 0);
4291                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4292                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4293                 WREG32(CP_CE_UCODE_ADDR, 0);
4294
4295                 /* ME */
4296                 fw_data = (const __be32 *)rdev->me_fw->data;
4297                 WREG32(CP_ME_RAM_WADDR, 0);
4298                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4299                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4300                 WREG32(CP_ME_RAM_WADDR, 0);
4301         }
4302
4303         return 0;
4304 }
4305
4306 /**
4307  * cik_cp_gfx_start - start the gfx ring
4308  *
4309  * @rdev: radeon_device pointer
4310  *
4311  * Enables the ring and loads the clear state context and other
4312  * packets required to init the ring.
4313  * Returns 0 for success, error for failure.
4314  */
4315 static int cik_cp_gfx_start(struct radeon_device *rdev)
4316 {
4317         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4318         int r, i;
4319
4320         /* init the CP */
4321         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4322         WREG32(CP_ENDIAN_SWAP, 0);
4323         WREG32(CP_DEVICE_ID, 1);
4324
4325         cik_cp_gfx_enable(rdev, true);
4326
4327         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4328         if (r) {
4329                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4330                 return r;
4331         }
4332
4333         /* init the CE partitions.  CE only used for gfx on CIK */
4334         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336         radeon_ring_write(ring, 0x8000);
4337         radeon_ring_write(ring, 0x8000);
4338
4339         /* setup clear context state */
4340         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4341         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4342
4343         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4344         radeon_ring_write(ring, 0x80000000);
4345         radeon_ring_write(ring, 0x80000000);
4346
4347         for (i = 0; i < cik_default_size; i++)
4348                 radeon_ring_write(ring, cik_default_state[i]);
4349
4350         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4351         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4352
4353         /* set clear context state */
4354         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4355         radeon_ring_write(ring, 0);
4356
4357         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4358         radeon_ring_write(ring, 0x00000316);
4359         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4360         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4361
4362         radeon_ring_unlock_commit(rdev, ring, false);
4363
4364         return 0;
4365 }
4366
4367 /**
4368  * cik_cp_gfx_fini - stop the gfx ring
4369  *
4370  * @rdev: radeon_device pointer
4371  *
4372  * Stop the gfx ring and tear down the driver ring
4373  * info.
4374  */
4375 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4376 {
4377         cik_cp_gfx_enable(rdev, false);
4378         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4379 }
4380
4381 /**
4382  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Program the location and size of the gfx ring buffer
4387  * and test it to make sure it's working.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4391 {
4392         struct radeon_ring *ring;
4393         u32 tmp;
4394         u32 rb_bufsz;
4395         u64 rb_addr;
4396         int r;
4397
4398         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4399         if (rdev->family != CHIP_HAWAII)
4400                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4401
4402         /* Set the write pointer delay */
4403         WREG32(CP_RB_WPTR_DELAY, 0);
4404
4405         /* set the RB to use vmid 0 */
4406         WREG32(CP_RB_VMID, 0);
4407
4408         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4409
4410         /* ring 0 - compute and gfx */
4411         /* Set ring buffer size */
4412         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4413         rb_bufsz = order_base_2(ring->ring_size / 8);
4414         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4415 #ifdef __BIG_ENDIAN
4416         tmp |= BUF_SWAP_32BIT;
4417 #endif
4418         WREG32(CP_RB0_CNTL, tmp);
4419
4420         /* Initialize the ring buffer's read and write pointers */
4421         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4422         ring->wptr = 0;
4423         WREG32(CP_RB0_WPTR, ring->wptr);
4424
4425         /* set the wb address wether it's enabled or not */
4426         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4427         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4428
4429         /* scratch register shadowing is no longer supported */
4430         WREG32(SCRATCH_UMSK, 0);
4431
4432         if (!rdev->wb.enabled)
4433                 tmp |= RB_NO_UPDATE;
4434
4435         mdelay(1);
4436         WREG32(CP_RB0_CNTL, tmp);
4437
4438         rb_addr = ring->gpu_addr >> 8;
4439         WREG32(CP_RB0_BASE, rb_addr);
4440         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4441
4442         /* start the ring */
4443         cik_cp_gfx_start(rdev);
4444         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4445         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4446         if (r) {
4447                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4448                 return r;
4449         }
4450
4451         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4452                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4453
4454         return 0;
4455 }
4456
4457 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4458                      struct radeon_ring *ring)
4459 {
4460         u32 rptr;
4461
4462         if (rdev->wb.enabled)
4463                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4464         else
4465                 rptr = RREG32(CP_RB0_RPTR);
4466
4467         return rptr;
4468 }
4469
4470 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4471                      struct radeon_ring *ring)
4472 {
4473         u32 wptr;
4474
4475         wptr = RREG32(CP_RB0_WPTR);
4476
4477         return wptr;
4478 }
4479
4480 void cik_gfx_set_wptr(struct radeon_device *rdev,
4481                       struct radeon_ring *ring)
4482 {
4483         WREG32(CP_RB0_WPTR, ring->wptr);
4484         (void)RREG32(CP_RB0_WPTR);
4485 }
4486
4487 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4488                          struct radeon_ring *ring)
4489 {
4490         u32 rptr;
4491
4492         if (rdev->wb.enabled) {
4493                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4494         } else {
4495                 mutex_lock(&rdev->srbm_mutex);
4496                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4497                 rptr = RREG32(CP_HQD_PQ_RPTR);
4498                 cik_srbm_select(rdev, 0, 0, 0, 0);
4499                 mutex_unlock(&rdev->srbm_mutex);
4500         }
4501
4502         return rptr;
4503 }
4504
4505 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4506                          struct radeon_ring *ring)
4507 {
4508         u32 wptr;
4509
4510         if (rdev->wb.enabled) {
4511                 /* XXX check if swapping is necessary on BE */
4512                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4513         } else {
4514                 mutex_lock(&rdev->srbm_mutex);
4515                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4516                 wptr = RREG32(CP_HQD_PQ_WPTR);
4517                 cik_srbm_select(rdev, 0, 0, 0, 0);
4518                 mutex_unlock(&rdev->srbm_mutex);
4519         }
4520
4521         return wptr;
4522 }
4523
4524 void cik_compute_set_wptr(struct radeon_device *rdev,
4525                           struct radeon_ring *ring)
4526 {
4527         /* XXX check if swapping is necessary on BE */
4528         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4529         WDOORBELL32(ring->doorbell_index, ring->wptr);
4530 }
4531
4532 /**
4533  * cik_cp_compute_enable - enable/disable the compute CP MEs
4534  *
4535  * @rdev: radeon_device pointer
4536  * @enable: enable or disable the MEs
4537  *
4538  * Halts or unhalts the compute MEs.
4539  */
4540 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4541 {
4542         if (enable)
4543                 WREG32(CP_MEC_CNTL, 0);
4544         else {
4545                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4546                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4547                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4548         }
4549         udelay(50);
4550 }
4551
4552 /**
4553  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4554  *
4555  * @rdev: radeon_device pointer
4556  *
4557  * Loads the compute MEC1&2 ucode.
4558  * Returns 0 for success, -EINVAL if the ucode is not available.
4559  */
4560 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4561 {
4562         int i;
4563
4564         if (!rdev->mec_fw)
4565                 return -EINVAL;
4566
4567         cik_cp_compute_enable(rdev, false);
4568
4569         if (rdev->new_fw) {
4570                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4571                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4572                 const __le32 *fw_data;
4573                 u32 fw_size;
4574
4575                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4576
4577                 /* MEC1 */
4578                 fw_data = (const __le32 *)
4579                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4580                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4581                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4582                 for (i = 0; i < fw_size; i++)
4583                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4584                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4585
4586                 /* MEC2 */
4587                 if (rdev->family == CHIP_KAVERI) {
4588                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4589                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4590
4591                         fw_data = (const __le32 *)
4592                                 (rdev->mec2_fw->data +
4593                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4594                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4595                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4596                         for (i = 0; i < fw_size; i++)
4597                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4598                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4599                 }
4600         } else {
4601                 const __be32 *fw_data;
4602
4603                 /* MEC1 */
4604                 fw_data = (const __be32 *)rdev->mec_fw->data;
4605                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4606                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4607                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4608                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4609
4610                 if (rdev->family == CHIP_KAVERI) {
4611                         /* MEC2 */
4612                         fw_data = (const __be32 *)rdev->mec_fw->data;
4613                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4614                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4615                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4616                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4617                 }
4618         }
4619
4620         return 0;
4621 }
4622
4623 /**
4624  * cik_cp_compute_start - start the compute queues
4625  *
4626  * @rdev: radeon_device pointer
4627  *
4628  * Enable the compute queues.
4629  * Returns 0 for success, error for failure.
4630  */
4631 static int cik_cp_compute_start(struct radeon_device *rdev)
4632 {
4633         cik_cp_compute_enable(rdev, true);
4634
4635         return 0;
4636 }
4637
4638 /**
4639  * cik_cp_compute_fini - stop the compute queues
4640  *
4641  * @rdev: radeon_device pointer
4642  *
4643  * Stop the compute queues and tear down the driver queue
4644  * info.
4645  */
4646 static void cik_cp_compute_fini(struct radeon_device *rdev)
4647 {
4648         int i, idx, r;
4649
4650         cik_cp_compute_enable(rdev, false);
4651
4652         for (i = 0; i < 2; i++) {
4653                 if (i == 0)
4654                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4655                 else
4656                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4657
4658                 if (rdev->ring[idx].mqd_obj) {
4659                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4660                         if (unlikely(r != 0))
4661                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4662
4663                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4664                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4665
4666                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4667                         rdev->ring[idx].mqd_obj = NULL;
4668                 }
4669         }
4670 }
4671
4672 static void cik_mec_fini(struct radeon_device *rdev)
4673 {
4674         int r;
4675
4676         if (rdev->mec.hpd_eop_obj) {
4677                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4678                 if (unlikely(r != 0))
4679                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4680                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4681                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4682
4683                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4684                 rdev->mec.hpd_eop_obj = NULL;
4685         }
4686 }
4687
4688 #define MEC_HPD_SIZE 2048
4689
4690 static int cik_mec_init(struct radeon_device *rdev)
4691 {
4692         int r;
4693         u32 *hpd;
4694
4695         /*
4696          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4697          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4698          * Nonetheless, we assign only 1 pipe because all other pipes will
4699          * be handled by KFD
4700          */
4701         rdev->mec.num_mec = 1;
4702         rdev->mec.num_pipe = 1;
4703         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4704
4705         if (rdev->mec.hpd_eop_obj == NULL) {
4706                 r = radeon_bo_create(rdev,
4707                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4708                                      PAGE_SIZE, true,
4709                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4710                                      &rdev->mec.hpd_eop_obj);
4711                 if (r) {
4712                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4713                         return r;
4714                 }
4715         }
4716
4717         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4718         if (unlikely(r != 0)) {
4719                 cik_mec_fini(rdev);
4720                 return r;
4721         }
4722         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4723                           &rdev->mec.hpd_eop_gpu_addr);
4724         if (r) {
4725                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4726                 cik_mec_fini(rdev);
4727                 return r;
4728         }
4729         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4730         if (r) {
4731                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4732                 cik_mec_fini(rdev);
4733                 return r;
4734         }
4735
4736         /* clear memory.  Not sure if this is required or not */
4737         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4738
4739         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4740         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4741
4742         return 0;
4743 }
4744
4745 struct hqd_registers
4746 {
4747         u32 cp_mqd_base_addr;
4748         u32 cp_mqd_base_addr_hi;
4749         u32 cp_hqd_active;
4750         u32 cp_hqd_vmid;
4751         u32 cp_hqd_persistent_state;
4752         u32 cp_hqd_pipe_priority;
4753         u32 cp_hqd_queue_priority;
4754         u32 cp_hqd_quantum;
4755         u32 cp_hqd_pq_base;
4756         u32 cp_hqd_pq_base_hi;
4757         u32 cp_hqd_pq_rptr;
4758         u32 cp_hqd_pq_rptr_report_addr;
4759         u32 cp_hqd_pq_rptr_report_addr_hi;
4760         u32 cp_hqd_pq_wptr_poll_addr;
4761         u32 cp_hqd_pq_wptr_poll_addr_hi;
4762         u32 cp_hqd_pq_doorbell_control;
4763         u32 cp_hqd_pq_wptr;
4764         u32 cp_hqd_pq_control;
4765         u32 cp_hqd_ib_base_addr;
4766         u32 cp_hqd_ib_base_addr_hi;
4767         u32 cp_hqd_ib_rptr;
4768         u32 cp_hqd_ib_control;
4769         u32 cp_hqd_iq_timer;
4770         u32 cp_hqd_iq_rptr;
4771         u32 cp_hqd_dequeue_request;
4772         u32 cp_hqd_dma_offload;
4773         u32 cp_hqd_sema_cmd;
4774         u32 cp_hqd_msg_type;
4775         u32 cp_hqd_atomic0_preop_lo;
4776         u32 cp_hqd_atomic0_preop_hi;
4777         u32 cp_hqd_atomic1_preop_lo;
4778         u32 cp_hqd_atomic1_preop_hi;
4779         u32 cp_hqd_hq_scheduler0;
4780         u32 cp_hqd_hq_scheduler1;
4781         u32 cp_mqd_control;
4782 };
4783
4784 struct bonaire_mqd
4785 {
4786         u32 header;
4787         u32 dispatch_initiator;
4788         u32 dimensions[3];
4789         u32 start_idx[3];
4790         u32 num_threads[3];
4791         u32 pipeline_stat_enable;
4792         u32 perf_counter_enable;
4793         u32 pgm[2];
4794         u32 tba[2];
4795         u32 tma[2];
4796         u32 pgm_rsrc[2];
4797         u32 vmid;
4798         u32 resource_limits;
4799         u32 static_thread_mgmt01[2];
4800         u32 tmp_ring_size;
4801         u32 static_thread_mgmt23[2];
4802         u32 restart[3];
4803         u32 thread_trace_enable;
4804         u32 reserved1;
4805         u32 user_data[16];
4806         u32 vgtcs_invoke_count[2];
4807         struct hqd_registers queue_state;
4808         u32 dequeue_cntr;
4809         u32 interrupt_queue[64];
4810 };
4811
4812 /**
4813  * cik_cp_compute_resume - setup the compute queue registers
4814  *
4815  * @rdev: radeon_device pointer
4816  *
4817  * Program the compute queues and test them to make sure they
4818  * are working.
4819  * Returns 0 for success, error for failure.
4820  */
4821 static int cik_cp_compute_resume(struct radeon_device *rdev)
4822 {
4823         int r, i, j, idx;
4824         u32 tmp;
4825         bool use_doorbell = true;
4826         u64 hqd_gpu_addr;
4827         u64 mqd_gpu_addr;
4828         u64 eop_gpu_addr;
4829         u64 wb_gpu_addr;
4830         u32 *buf;
4831         struct bonaire_mqd *mqd;
4832
4833         r = cik_cp_compute_start(rdev);
4834         if (r)
4835                 return r;
4836
4837         /* fix up chicken bits */
4838         tmp = RREG32(CP_CPF_DEBUG);
4839         tmp |= (1 << 23);
4840         WREG32(CP_CPF_DEBUG, tmp);
4841
4842         /* init the pipes */
4843         mutex_lock(&rdev->srbm_mutex);
4844
4845         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4846
4847         cik_srbm_select(rdev, 0, 0, 0, 0);
4848
4849         /* write the EOP addr */
4850         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4851         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4852
4853         /* set the VMID assigned */
4854         WREG32(CP_HPD_EOP_VMID, 0);
4855
4856         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4857         tmp = RREG32(CP_HPD_EOP_CONTROL);
4858         tmp &= ~EOP_SIZE_MASK;
4859         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4860         WREG32(CP_HPD_EOP_CONTROL, tmp);
4861
4862         mutex_unlock(&rdev->srbm_mutex);
4863
4864         /* init the queues.  Just two for now. */
4865         for (i = 0; i < 2; i++) {
4866                 if (i == 0)
4867                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4868                 else
4869                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4870
4871                 if (rdev->ring[idx].mqd_obj == NULL) {
4872                         r = radeon_bo_create(rdev,
4873                                              sizeof(struct bonaire_mqd),
4874                                              PAGE_SIZE, true,
4875                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4876                                              NULL, &rdev->ring[idx].mqd_obj);
4877                         if (r) {
4878                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4879                                 return r;
4880                         }
4881                 }
4882
4883                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4884                 if (unlikely(r != 0)) {
4885                         cik_cp_compute_fini(rdev);
4886                         return r;
4887                 }
4888                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4889                                   &mqd_gpu_addr);
4890                 if (r) {
4891                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4892                         cik_cp_compute_fini(rdev);
4893                         return r;
4894                 }
4895                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4896                 if (r) {
4897                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4898                         cik_cp_compute_fini(rdev);
4899                         return r;
4900                 }
4901
4902                 /* init the mqd struct */
4903                 memset(buf, 0, sizeof(struct bonaire_mqd));
4904
4905                 mqd = (struct bonaire_mqd *)buf;
4906                 mqd->header = 0xC0310800;
4907                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4908                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4909                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4910                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4911
4912                 mutex_lock(&rdev->srbm_mutex);
4913                 cik_srbm_select(rdev, rdev->ring[idx].me,
4914                                 rdev->ring[idx].pipe,
4915                                 rdev->ring[idx].queue, 0);
4916
4917                 /* disable wptr polling */
4918                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4919                 tmp &= ~WPTR_POLL_EN;
4920                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4921
4922                 /* enable doorbell? */
4923                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4924                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4925                 if (use_doorbell)
4926                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4927                 else
4928                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4929                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4930                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4931
4932                 /* disable the queue if it's active */
4933                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4934                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4935                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4936                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4937                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4938                         for (j = 0; j < rdev->usec_timeout; j++) {
4939                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4940                                         break;
4941                                 udelay(1);
4942                         }
4943                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4944                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4945                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4946                 }
4947
4948                 /* set the pointer to the MQD */
4949                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4950                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4951                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4952                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4953                 /* set MQD vmid to 0 */
4954                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4955                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4956                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4957
4958                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4959                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4960                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4961                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4962                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4963                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4964
4965                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4966                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4967                 mqd->queue_state.cp_hqd_pq_control &=
4968                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4969
4970                 mqd->queue_state.cp_hqd_pq_control |=
4971                         order_base_2(rdev->ring[idx].ring_size / 8);
4972                 mqd->queue_state.cp_hqd_pq_control |=
4973                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4974 #ifdef __BIG_ENDIAN
4975                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4976 #endif
4977                 mqd->queue_state.cp_hqd_pq_control &=
4978                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4979                 mqd->queue_state.cp_hqd_pq_control |=
4980                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4981                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4982
4983                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4984                 if (i == 0)
4985                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4986                 else
4987                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4988                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4989                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4990                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4991                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4992                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4993
4994                 /* set the wb address wether it's enabled or not */
4995                 if (i == 0)
4996                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4997                 else
4998                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4999                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5000                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5001                         upper_32_bits(wb_gpu_addr) & 0xffff;
5002                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5003                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5004                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5005                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5006
5007                 /* enable the doorbell if requested */
5008                 if (use_doorbell) {
5009                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5010                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5011                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5012                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5013                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5014                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5015                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5016                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5017
5018                 } else {
5019                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5020                 }
5021                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5022                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5023
5024                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5025                 rdev->ring[idx].wptr = 0;
5026                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5027                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5028                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5029
5030                 /* set the vmid for the queue */
5031                 mqd->queue_state.cp_hqd_vmid = 0;
5032                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5033
5034                 /* activate the queue */
5035                 mqd->queue_state.cp_hqd_active = 1;
5036                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5037
5038                 cik_srbm_select(rdev, 0, 0, 0, 0);
5039                 mutex_unlock(&rdev->srbm_mutex);
5040
5041                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5042                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5043
5044                 rdev->ring[idx].ready = true;
5045                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5046                 if (r)
5047                         rdev->ring[idx].ready = false;
5048         }
5049
5050         return 0;
5051 }
5052
5053 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5054 {
5055         cik_cp_gfx_enable(rdev, enable);
5056         cik_cp_compute_enable(rdev, enable);
5057 }
5058
5059 static int cik_cp_load_microcode(struct radeon_device *rdev)
5060 {
5061         int r;
5062
5063         r = cik_cp_gfx_load_microcode(rdev);
5064         if (r)
5065                 return r;
5066         r = cik_cp_compute_load_microcode(rdev);
5067         if (r)
5068                 return r;
5069
5070         return 0;
5071 }
5072
5073 static void cik_cp_fini(struct radeon_device *rdev)
5074 {
5075         cik_cp_gfx_fini(rdev);
5076         cik_cp_compute_fini(rdev);
5077 }
5078
5079 static int cik_cp_resume(struct radeon_device *rdev)
5080 {
5081         int r;
5082
5083         cik_enable_gui_idle_interrupt(rdev, false);
5084
5085         r = cik_cp_load_microcode(rdev);
5086         if (r)
5087                 return r;
5088
5089         r = cik_cp_gfx_resume(rdev);
5090         if (r)
5091                 return r;
5092         r = cik_cp_compute_resume(rdev);
5093         if (r)
5094                 return r;
5095
5096         cik_enable_gui_idle_interrupt(rdev, true);
5097
5098         return 0;
5099 }
5100
5101 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5102 {
5103         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5104                 RREG32(GRBM_STATUS));
5105         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5106                 RREG32(GRBM_STATUS2));
5107         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5108                 RREG32(GRBM_STATUS_SE0));
5109         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5110                 RREG32(GRBM_STATUS_SE1));
5111         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5112                 RREG32(GRBM_STATUS_SE2));
5113         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5114                 RREG32(GRBM_STATUS_SE3));
5115         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5116                 RREG32(SRBM_STATUS));
5117         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5118                 RREG32(SRBM_STATUS2));
5119         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5120                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5121         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5122                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5123         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5124         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5125                  RREG32(CP_STALLED_STAT1));
5126         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5127                  RREG32(CP_STALLED_STAT2));
5128         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5129                  RREG32(CP_STALLED_STAT3));
5130         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5131                  RREG32(CP_CPF_BUSY_STAT));
5132         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5133                  RREG32(CP_CPF_STALLED_STAT1));
5134         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5135         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5136         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5137                  RREG32(CP_CPC_STALLED_STAT1));
5138         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5139 }
5140
5141 /**
5142  * cik_gpu_check_soft_reset - check which blocks are busy
5143  *
5144  * @rdev: radeon_device pointer
5145  *
5146  * Check which blocks are busy and return the relevant reset
5147  * mask to be used by cik_gpu_soft_reset().
5148  * Returns a mask of the blocks to be reset.
5149  */
5150 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5151 {
5152         u32 reset_mask = 0;
5153         u32 tmp;
5154
5155         /* GRBM_STATUS */
5156         tmp = RREG32(GRBM_STATUS);
5157         if (tmp & (PA_BUSY | SC_BUSY |
5158                    BCI_BUSY | SX_BUSY |
5159                    TA_BUSY | VGT_BUSY |
5160                    DB_BUSY | CB_BUSY |
5161                    GDS_BUSY | SPI_BUSY |
5162                    IA_BUSY | IA_BUSY_NO_DMA))
5163                 reset_mask |= RADEON_RESET_GFX;
5164
5165         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5166                 reset_mask |= RADEON_RESET_CP;
5167
5168         /* GRBM_STATUS2 */
5169         tmp = RREG32(GRBM_STATUS2);
5170         if (tmp & RLC_BUSY)
5171                 reset_mask |= RADEON_RESET_RLC;
5172
5173         /* SDMA0_STATUS_REG */
5174         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5175         if (!(tmp & SDMA_IDLE))
5176                 reset_mask |= RADEON_RESET_DMA;
5177
5178         /* SDMA1_STATUS_REG */
5179         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5180         if (!(tmp & SDMA_IDLE))
5181                 reset_mask |= RADEON_RESET_DMA1;
5182
5183         /* SRBM_STATUS2 */
5184         tmp = RREG32(SRBM_STATUS2);
5185         if (tmp & SDMA_BUSY)
5186                 reset_mask |= RADEON_RESET_DMA;
5187
5188         if (tmp & SDMA1_BUSY)
5189                 reset_mask |= RADEON_RESET_DMA1;
5190
5191         /* SRBM_STATUS */
5192         tmp = RREG32(SRBM_STATUS);
5193
5194         if (tmp & IH_BUSY)
5195                 reset_mask |= RADEON_RESET_IH;
5196
5197         if (tmp & SEM_BUSY)
5198                 reset_mask |= RADEON_RESET_SEM;
5199
5200         if (tmp & GRBM_RQ_PENDING)
5201                 reset_mask |= RADEON_RESET_GRBM;
5202
5203         if (tmp & VMC_BUSY)
5204                 reset_mask |= RADEON_RESET_VMC;
5205
5206         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5207                    MCC_BUSY | MCD_BUSY))
5208                 reset_mask |= RADEON_RESET_MC;
5209
5210         if (evergreen_is_display_hung(rdev))
5211                 reset_mask |= RADEON_RESET_DISPLAY;
5212
5213         /* Skip MC reset as it's mostly likely not hung, just busy */
5214         if (reset_mask & RADEON_RESET_MC) {
5215                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5216                 reset_mask &= ~RADEON_RESET_MC;
5217         }
5218
5219         return reset_mask;
5220 }
5221
5222 /**
5223  * cik_gpu_soft_reset - soft reset GPU
5224  *
5225  * @rdev: radeon_device pointer
5226  * @reset_mask: mask of which blocks to reset
5227  *
5228  * Soft reset the blocks specified in @reset_mask.
5229  */
5230 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5231 {
5232         struct evergreen_mc_save save;
5233         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5234         u32 tmp;
5235
5236         if (reset_mask == 0)
5237                 return;
5238
5239         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5240
5241         cik_print_gpu_status_regs(rdev);
5242         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5243                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5244         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5245                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5246
5247         /* disable CG/PG */
5248         cik_fini_pg(rdev);
5249         cik_fini_cg(rdev);
5250
5251         /* stop the rlc */
5252         cik_rlc_stop(rdev);
5253
5254         /* Disable GFX parsing/prefetching */
5255         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5256
5257         /* Disable MEC parsing/prefetching */
5258         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5259
5260         if (reset_mask & RADEON_RESET_DMA) {
5261                 /* sdma0 */
5262                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5263                 tmp |= SDMA_HALT;
5264                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5265         }
5266         if (reset_mask & RADEON_RESET_DMA1) {
5267                 /* sdma1 */
5268                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5269                 tmp |= SDMA_HALT;
5270                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5271         }
5272
5273         evergreen_mc_stop(rdev, &save);
5274         if (evergreen_mc_wait_for_idle(rdev)) {
5275                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5276         }
5277
5278         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5279                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5280
5281         if (reset_mask & RADEON_RESET_CP) {
5282                 grbm_soft_reset |= SOFT_RESET_CP;
5283
5284                 srbm_soft_reset |= SOFT_RESET_GRBM;
5285         }
5286
5287         if (reset_mask & RADEON_RESET_DMA)
5288                 srbm_soft_reset |= SOFT_RESET_SDMA;
5289
5290         if (reset_mask & RADEON_RESET_DMA1)
5291                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5292
5293         if (reset_mask & RADEON_RESET_DISPLAY)
5294                 srbm_soft_reset |= SOFT_RESET_DC;
5295
5296         if (reset_mask & RADEON_RESET_RLC)
5297                 grbm_soft_reset |= SOFT_RESET_RLC;
5298
5299         if (reset_mask & RADEON_RESET_SEM)
5300                 srbm_soft_reset |= SOFT_RESET_SEM;
5301
5302         if (reset_mask & RADEON_RESET_IH)
5303                 srbm_soft_reset |= SOFT_RESET_IH;
5304
5305         if (reset_mask & RADEON_RESET_GRBM)
5306                 srbm_soft_reset |= SOFT_RESET_GRBM;
5307
5308         if (reset_mask & RADEON_RESET_VMC)
5309                 srbm_soft_reset |= SOFT_RESET_VMC;
5310
5311         if (!(rdev->flags & RADEON_IS_IGP)) {
5312                 if (reset_mask & RADEON_RESET_MC)
5313                         srbm_soft_reset |= SOFT_RESET_MC;
5314         }
5315
5316         if (grbm_soft_reset) {
5317                 tmp = RREG32(GRBM_SOFT_RESET);
5318                 tmp |= grbm_soft_reset;
5319                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5320                 WREG32(GRBM_SOFT_RESET, tmp);
5321                 tmp = RREG32(GRBM_SOFT_RESET);
5322
5323                 udelay(50);
5324
5325                 tmp &= ~grbm_soft_reset;
5326                 WREG32(GRBM_SOFT_RESET, tmp);
5327                 tmp = RREG32(GRBM_SOFT_RESET);
5328         }
5329
5330         if (srbm_soft_reset) {
5331                 tmp = RREG32(SRBM_SOFT_RESET);
5332                 tmp |= srbm_soft_reset;
5333                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5334                 WREG32(SRBM_SOFT_RESET, tmp);
5335                 tmp = RREG32(SRBM_SOFT_RESET);
5336
5337                 udelay(50);
5338
5339                 tmp &= ~srbm_soft_reset;
5340                 WREG32(SRBM_SOFT_RESET, tmp);
5341                 tmp = RREG32(SRBM_SOFT_RESET);
5342         }
5343
5344         /* Wait a little for things to settle down */
5345         udelay(50);
5346
5347         evergreen_mc_resume(rdev, &save);
5348         udelay(50);
5349
5350         cik_print_gpu_status_regs(rdev);
5351 }
5352
5353 struct kv_reset_save_regs {
5354         u32 gmcon_reng_execute;
5355         u32 gmcon_misc;
5356         u32 gmcon_misc3;
5357 };
5358
5359 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5360                                    struct kv_reset_save_regs *save)
5361 {
5362         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5363         save->gmcon_misc = RREG32(GMCON_MISC);
5364         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5365
5366         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5367         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5368                                                 STCTRL_STUTTER_EN));
5369 }
5370
5371 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5372                                       struct kv_reset_save_regs *save)
5373 {
5374         int i;
5375
5376         WREG32(GMCON_PGFSM_WRITE, 0);
5377         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5378
5379         for (i = 0; i < 5; i++)
5380                 WREG32(GMCON_PGFSM_WRITE, 0);
5381
5382         WREG32(GMCON_PGFSM_WRITE, 0);
5383         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5384
5385         for (i = 0; i < 5; i++)
5386                 WREG32(GMCON_PGFSM_WRITE, 0);
5387
5388         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5389         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5390
5391         for (i = 0; i < 5; i++)
5392                 WREG32(GMCON_PGFSM_WRITE, 0);
5393
5394         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5395         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5396
5397         for (i = 0; i < 5; i++)
5398                 WREG32(GMCON_PGFSM_WRITE, 0);
5399
5400         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5401         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5402
5403         for (i = 0; i < 5; i++)
5404                 WREG32(GMCON_PGFSM_WRITE, 0);
5405
5406         WREG32(GMCON_PGFSM_WRITE, 0);
5407         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5408
5409         for (i = 0; i < 5; i++)
5410                 WREG32(GMCON_PGFSM_WRITE, 0);
5411
5412         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5413         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5414
5415         for (i = 0; i < 5; i++)
5416                 WREG32(GMCON_PGFSM_WRITE, 0);
5417
5418         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5419         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5420
5421         for (i = 0; i < 5; i++)
5422                 WREG32(GMCON_PGFSM_WRITE, 0);
5423
5424         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5425         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5426
5427         for (i = 0; i < 5; i++)
5428                 WREG32(GMCON_PGFSM_WRITE, 0);
5429
5430         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5431         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5432
5433         for (i = 0; i < 5; i++)
5434                 WREG32(GMCON_PGFSM_WRITE, 0);
5435
5436         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5437         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5438
5439         WREG32(GMCON_MISC3, save->gmcon_misc3);
5440         WREG32(GMCON_MISC, save->gmcon_misc);
5441         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5442 }
5443
5444 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5445 {
5446         struct evergreen_mc_save save;
5447         struct kv_reset_save_regs kv_save = { 0 };
5448         u32 tmp, i;
5449
5450         dev_info(rdev->dev, "GPU pci config reset\n");
5451
5452         /* disable dpm? */
5453
5454         /* disable cg/pg */
5455         cik_fini_pg(rdev);
5456         cik_fini_cg(rdev);
5457
5458         /* Disable GFX parsing/prefetching */
5459         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5460
5461         /* Disable MEC parsing/prefetching */
5462         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5463
5464         /* sdma0 */
5465         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5466         tmp |= SDMA_HALT;
5467         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5468         /* sdma1 */
5469         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5470         tmp |= SDMA_HALT;
5471         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5472         /* XXX other engines? */
5473
5474         /* halt the rlc, disable cp internal ints */
5475         cik_rlc_stop(rdev);
5476
5477         udelay(50);
5478
5479         /* disable mem access */
5480         evergreen_mc_stop(rdev, &save);
5481         if (evergreen_mc_wait_for_idle(rdev)) {
5482                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5483         }
5484
5485         if (rdev->flags & RADEON_IS_IGP)
5486                 kv_save_regs_for_reset(rdev, &kv_save);
5487
5488         /* disable BM */
5489         pci_clear_master(rdev->pdev);
5490         /* reset */
5491         radeon_pci_config_reset(rdev);
5492
5493         udelay(100);
5494
5495         /* wait for asic to come out of reset */
5496         for (i = 0; i < rdev->usec_timeout; i++) {
5497                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5498                         break;
5499                 udelay(1);
5500         }
5501
5502         /* does asic init need to be run first??? */
5503         if (rdev->flags & RADEON_IS_IGP)
5504                 kv_restore_regs_for_reset(rdev, &kv_save);
5505 }
5506
5507 /**
5508  * cik_asic_reset - soft reset GPU
5509  *
5510  * @rdev: radeon_device pointer
5511  *
5512  * Look up which blocks are hung and attempt
5513  * to reset them.
5514  * Returns 0 for success.
5515  */
5516 int cik_asic_reset(struct radeon_device *rdev)
5517 {
5518         u32 reset_mask;
5519
5520         reset_mask = cik_gpu_check_soft_reset(rdev);
5521
5522         if (reset_mask)
5523                 r600_set_bios_scratch_engine_hung(rdev, true);
5524
5525         /* try soft reset */
5526         cik_gpu_soft_reset(rdev, reset_mask);
5527
5528         reset_mask = cik_gpu_check_soft_reset(rdev);
5529
5530         /* try pci config reset */
5531         if (reset_mask && radeon_hard_reset)
5532                 cik_gpu_pci_config_reset(rdev);
5533
5534         reset_mask = cik_gpu_check_soft_reset(rdev);
5535
5536         if (!reset_mask)
5537                 r600_set_bios_scratch_engine_hung(rdev, false);
5538
5539         return 0;
5540 }
5541
5542 /**
5543  * cik_gfx_is_lockup - check if the 3D engine is locked up
5544  *
5545  * @rdev: radeon_device pointer
5546  * @ring: radeon_ring structure holding ring information
5547  *
5548  * Check if the 3D engine is locked up (CIK).
5549  * Returns true if the engine is locked, false if not.
5550  */
5551 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5552 {
5553         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5554
5555         if (!(reset_mask & (RADEON_RESET_GFX |
5556                             RADEON_RESET_COMPUTE |
5557                             RADEON_RESET_CP))) {
5558                 radeon_ring_lockup_update(rdev, ring);
5559                 return false;
5560         }
5561         return radeon_ring_test_lockup(rdev, ring);
5562 }
5563
5564 /* MC */
5565 /**
5566  * cik_mc_program - program the GPU memory controller
5567  *
5568  * @rdev: radeon_device pointer
5569  *
5570  * Set the location of vram, gart, and AGP in the GPU's
5571  * physical address space (CIK).
5572  */
5573 static void cik_mc_program(struct radeon_device *rdev)
5574 {
5575         struct evergreen_mc_save save;
5576         u32 tmp;
5577         int i, j;
5578
5579         /* Initialize HDP */
5580         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5581                 WREG32((0x2c14 + j), 0x00000000);
5582                 WREG32((0x2c18 + j), 0x00000000);
5583                 WREG32((0x2c1c + j), 0x00000000);
5584                 WREG32((0x2c20 + j), 0x00000000);
5585                 WREG32((0x2c24 + j), 0x00000000);
5586         }
5587         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5588
5589         evergreen_mc_stop(rdev, &save);
5590         if (radeon_mc_wait_for_idle(rdev)) {
5591                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5592         }
5593         /* Lockout access through VGA aperture*/
5594         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5595         /* Update configuration */
5596         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5597                rdev->mc.vram_start >> 12);
5598         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5599                rdev->mc.vram_end >> 12);
5600         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5601                rdev->vram_scratch.gpu_addr >> 12);
5602         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5603         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5604         WREG32(MC_VM_FB_LOCATION, tmp);
5605         /* XXX double check these! */
5606         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5607         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5608         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5609         WREG32(MC_VM_AGP_BASE, 0);
5610         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5611         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5612         if (radeon_mc_wait_for_idle(rdev)) {
5613                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5614         }
5615         evergreen_mc_resume(rdev, &save);
5616         /* we need to own VRAM, so turn off the VGA renderer here
5617          * to stop it overwriting our objects */
5618         rv515_vga_render_disable(rdev);
5619 }
5620
5621 /**
5622  * cik_mc_init - initialize the memory controller driver params
5623  *
5624  * @rdev: radeon_device pointer
5625  *
5626  * Look up the amount of vram, vram width, and decide how to place
5627  * vram and gart within the GPU's physical address space (CIK).
5628  * Returns 0 for success.
5629  */
5630 static int cik_mc_init(struct radeon_device *rdev)
5631 {
5632         u32 tmp;
5633         int chansize, numchan;
5634
5635         /* Get VRAM informations */
5636         rdev->mc.vram_is_ddr = true;
5637         tmp = RREG32(MC_ARB_RAMCFG);
5638         if (tmp & CHANSIZE_MASK) {
5639                 chansize = 64;
5640         } else {
5641                 chansize = 32;
5642         }
5643         tmp = RREG32(MC_SHARED_CHMAP);
5644         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5645         case 0:
5646         default:
5647                 numchan = 1;
5648                 break;
5649         case 1:
5650                 numchan = 2;
5651                 break;
5652         case 2:
5653                 numchan = 4;
5654                 break;
5655         case 3:
5656                 numchan = 8;
5657                 break;
5658         case 4:
5659                 numchan = 3;
5660                 break;
5661         case 5:
5662                 numchan = 6;
5663                 break;
5664         case 6:
5665                 numchan = 10;
5666                 break;
5667         case 7:
5668                 numchan = 12;
5669                 break;
5670         case 8:
5671                 numchan = 16;
5672                 break;
5673         }
5674         rdev->mc.vram_width = numchan * chansize;
5675         /* Could aper size report 0 ? */
5676         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5677         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5678         /* size in MB on si */
5679         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5680         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5681         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5682         si_vram_gtt_location(rdev, &rdev->mc);
5683         radeon_update_bandwidth_info(rdev);
5684
5685         return 0;
5686 }
5687
5688 /*
5689  * GART
5690  * VMID 0 is the physical GPU addresses as used by the kernel.
5691  * VMIDs 1-15 are used for userspace clients and are handled
5692  * by the radeon vm/hsa code.
5693  */
5694 /**
5695  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5696  *
5697  * @rdev: radeon_device pointer
5698  *
5699  * Flush the TLB for the VMID 0 page table (CIK).
5700  */
5701 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5702 {
5703         /* flush hdp cache */
5704         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5705
5706         /* bits 0-15 are the VM contexts0-15 */
5707         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5708 }
5709
5710 /**
5711  * cik_pcie_gart_enable - gart enable
5712  *
5713  * @rdev: radeon_device pointer
5714  *
5715  * This sets up the TLBs, programs the page tables for VMID0,
5716  * sets up the hw for VMIDs 1-15 which are allocated on
5717  * demand, and sets up the global locations for the LDS, GDS,
5718  * and GPUVM for FSA64 clients (CIK).
5719  * Returns 0 for success, errors for failure.
5720  */
5721 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5722 {
5723         int r, i;
5724
5725         if (rdev->gart.robj == NULL) {
5726                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5727                 return -EINVAL;
5728         }
5729         r = radeon_gart_table_vram_pin(rdev);
5730         if (r)
5731                 return r;
5732         /* Setup TLB control */
5733         WREG32(MC_VM_MX_L1_TLB_CNTL,
5734                (0xA << 7) |
5735                ENABLE_L1_TLB |
5736                ENABLE_L1_FRAGMENT_PROCESSING |
5737                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5738                ENABLE_ADVANCED_DRIVER_MODEL |
5739                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5740         /* Setup L2 cache */
5741         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5742                ENABLE_L2_FRAGMENT_PROCESSING |
5743                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5744                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5745                EFFECTIVE_L2_QUEUE_SIZE(7) |
5746                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5747         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5748         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5749                BANK_SELECT(4) |
5750                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5751         /* setup context0 */
5752         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5753         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5754         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5755         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5756                         (u32)(rdev->dummy_page.addr >> 12));
5757         WREG32(VM_CONTEXT0_CNTL2, 0);
5758         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5759                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5760
5761         WREG32(0x15D4, 0);
5762         WREG32(0x15D8, 0);
5763         WREG32(0x15DC, 0);
5764
5765         /* restore context1-15 */
5766         /* set vm size, must be a multiple of 4 */
5767         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5768         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5769         for (i = 1; i < 16; i++) {
5770                 if (i < 8)
5771                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5772                                rdev->vm_manager.saved_table_addr[i]);
5773                 else
5774                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5775                                rdev->vm_manager.saved_table_addr[i]);
5776         }
5777
5778         /* enable context1-15 */
5779         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5780                (u32)(rdev->dummy_page.addr >> 12));
5781         WREG32(VM_CONTEXT1_CNTL2, 4);
5782         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5783                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5784                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5785                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5786                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5787                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5788                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5789                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5790                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5791                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5792                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5793                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5794                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5795                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5796
5797         if (rdev->family == CHIP_KAVERI) {
5798                 u32 tmp = RREG32(CHUB_CONTROL);
5799                 tmp &= ~BYPASS_VM;
5800                 WREG32(CHUB_CONTROL, tmp);
5801         }
5802
5803         /* XXX SH_MEM regs */
5804         /* where to put LDS, scratch, GPUVM in FSA64 space */
5805         mutex_lock(&rdev->srbm_mutex);
5806         for (i = 0; i < 16; i++) {
5807                 cik_srbm_select(rdev, 0, 0, 0, i);
5808                 /* CP and shaders */
5809                 WREG32(SH_MEM_CONFIG, 0);
5810                 WREG32(SH_MEM_APE1_BASE, 1);
5811                 WREG32(SH_MEM_APE1_LIMIT, 0);
5812                 WREG32(SH_MEM_BASES, 0);
5813                 /* SDMA GFX */
5814                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5815                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5816                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5817                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5818                 /* XXX SDMA RLC - todo */
5819         }
5820         cik_srbm_select(rdev, 0, 0, 0, 0);
5821         mutex_unlock(&rdev->srbm_mutex);
5822
5823         cik_pcie_gart_tlb_flush(rdev);
5824         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5825                  (unsigned)(rdev->mc.gtt_size >> 20),
5826                  (unsigned long long)rdev->gart.table_addr);
5827         rdev->gart.ready = true;
5828         return 0;
5829 }
5830
5831 /**
5832  * cik_pcie_gart_disable - gart disable
5833  *
5834  * @rdev: radeon_device pointer
5835  *
5836  * This disables all VM page table (CIK).
5837  */
5838 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5839 {
5840         unsigned i;
5841
5842         for (i = 1; i < 16; ++i) {
5843                 uint32_t reg;
5844                 if (i < 8)
5845                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5846                 else
5847                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5848                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5849         }
5850
5851         /* Disable all tables */
5852         WREG32(VM_CONTEXT0_CNTL, 0);
5853         WREG32(VM_CONTEXT1_CNTL, 0);
5854         /* Setup TLB control */
5855         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5856                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5857         /* Setup L2 cache */
5858         WREG32(VM_L2_CNTL,
5859                ENABLE_L2_FRAGMENT_PROCESSING |
5860                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5861                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5862                EFFECTIVE_L2_QUEUE_SIZE(7) |
5863                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5864         WREG32(VM_L2_CNTL2, 0);
5865         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5866                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5867         radeon_gart_table_vram_unpin(rdev);
5868 }
5869
5870 /**
5871  * cik_pcie_gart_fini - vm fini callback
5872  *
5873  * @rdev: radeon_device pointer
5874  *
5875  * Tears down the driver GART/VM setup (CIK).
5876  */
5877 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5878 {
5879         cik_pcie_gart_disable(rdev);
5880         radeon_gart_table_vram_free(rdev);
5881         radeon_gart_fini(rdev);
5882 }
5883
5884 /* vm parser */
5885 /**
5886  * cik_ib_parse - vm ib_parse callback
5887  *
5888  * @rdev: radeon_device pointer
5889  * @ib: indirect buffer pointer
5890  *
5891  * CIK uses hw IB checking so this is a nop (CIK).
5892  */
5893 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5894 {
5895         return 0;
5896 }
5897
5898 /*
5899  * vm
5900  * VMID 0 is the physical GPU addresses as used by the kernel.
5901  * VMIDs 1-15 are used for userspace clients and are handled
5902  * by the radeon vm/hsa code.
5903  */
5904 /**
5905  * cik_vm_init - cik vm init callback
5906  *
5907  * @rdev: radeon_device pointer
5908  *
5909  * Inits cik specific vm parameters (number of VMs, base of vram for
5910  * VMIDs 1-15) (CIK).
5911  * Returns 0 for success.
5912  */
5913 int cik_vm_init(struct radeon_device *rdev)
5914 {
5915         /*
5916          * number of VMs
5917          * VMID 0 is reserved for System
5918          * radeon graphics/compute will use VMIDs 1-7
5919          * amdkfd will use VMIDs 8-15
5920          */
5921         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5922         /* base offset of vram pages */
5923         if (rdev->flags & RADEON_IS_IGP) {
5924                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5925                 tmp <<= 22;
5926                 rdev->vm_manager.vram_base_offset = tmp;
5927         } else
5928                 rdev->vm_manager.vram_base_offset = 0;
5929
5930         return 0;
5931 }
5932
5933 /**
5934  * cik_vm_fini - cik vm fini callback
5935  *
5936  * @rdev: radeon_device pointer
5937  *
5938  * Tear down any asic specific VM setup (CIK).
5939  */
5940 void cik_vm_fini(struct radeon_device *rdev)
5941 {
5942 }
5943
5944 /**
5945  * cik_vm_decode_fault - print human readable fault info
5946  *
5947  * @rdev: radeon_device pointer
5948  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5949  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5950  *
5951  * Print human readable fault information (CIK).
5952  */
5953 static void cik_vm_decode_fault(struct radeon_device *rdev,
5954                                 u32 status, u32 addr, u32 mc_client)
5955 {
5956         u32 mc_id;
5957         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5958         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5959         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5960                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5961
5962         if (rdev->family == CHIP_HAWAII)
5963                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5964         else
5965                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5966
5967         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5968                protections, vmid, addr,
5969                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5970                block, mc_client, mc_id);
5971 }
5972
5973 /**
5974  * cik_vm_flush - cik vm flush using the CP
5975  *
5976  * @rdev: radeon_device pointer
5977  *
5978  * Update the page table base and flush the VM TLB
5979  * using the CP (CIK).
5980  */
5981 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5982                   unsigned vm_id, uint64_t pd_addr)
5983 {
5984         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5985
5986         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5987         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5988                                  WRITE_DATA_DST_SEL(0)));
5989         if (vm_id < 8) {
5990                 radeon_ring_write(ring,
5991                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5992         } else {
5993                 radeon_ring_write(ring,
5994                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5995         }
5996         radeon_ring_write(ring, 0);
5997         radeon_ring_write(ring, pd_addr >> 12);
5998
5999         /* update SH_MEM_* regs */
6000         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6001         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6002                                  WRITE_DATA_DST_SEL(0)));
6003         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6004         radeon_ring_write(ring, 0);
6005         radeon_ring_write(ring, VMID(vm_id));
6006
6007         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6008         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6009                                  WRITE_DATA_DST_SEL(0)));
6010         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6011         radeon_ring_write(ring, 0);
6012
6013         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6014         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6015         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6016         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6017
6018         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6019         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6020                                  WRITE_DATA_DST_SEL(0)));
6021         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6022         radeon_ring_write(ring, 0);
6023         radeon_ring_write(ring, VMID(0));
6024
6025         /* HDP flush */
6026         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6027
6028         /* bits 0-15 are the VM contexts0-15 */
6029         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6030         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6031                                  WRITE_DATA_DST_SEL(0)));
6032         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6033         radeon_ring_write(ring, 0);
6034         radeon_ring_write(ring, 1 << vm_id);
6035
6036         /* wait for the invalidate to complete */
6037         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6038         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6039                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6040                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6041         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6042         radeon_ring_write(ring, 0);
6043         radeon_ring_write(ring, 0); /* ref */
6044         radeon_ring_write(ring, 0); /* mask */
6045         radeon_ring_write(ring, 0x20); /* poll interval */
6046
6047         /* compute doesn't have PFP */
6048         if (usepfp) {
6049                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6050                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6051                 radeon_ring_write(ring, 0x0);
6052         }
6053 }
6054
6055 /*
6056  * RLC
6057  * The RLC is a multi-purpose microengine that handles a
6058  * variety of functions, the most important of which is
6059  * the interrupt controller.
6060  */
6061 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6062                                           bool enable)
6063 {
6064         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6065
6066         if (enable)
6067                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6068         else
6069                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6070         WREG32(CP_INT_CNTL_RING0, tmp);
6071 }
6072
6073 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6074 {
6075         u32 tmp;
6076
6077         tmp = RREG32(RLC_LB_CNTL);
6078         if (enable)
6079                 tmp |= LOAD_BALANCE_ENABLE;
6080         else
6081                 tmp &= ~LOAD_BALANCE_ENABLE;
6082         WREG32(RLC_LB_CNTL, tmp);
6083 }
6084
6085 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6086 {
6087         u32 i, j, k;
6088         u32 mask;
6089
6090         mutex_lock(&rdev->grbm_idx_mutex);
6091         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6092                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6093                         cik_select_se_sh(rdev, i, j);
6094                         for (k = 0; k < rdev->usec_timeout; k++) {
6095                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6096                                         break;
6097                                 udelay(1);
6098                         }
6099                 }
6100         }
6101         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6102         mutex_unlock(&rdev->grbm_idx_mutex);
6103
6104         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6105         for (k = 0; k < rdev->usec_timeout; k++) {
6106                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6107                         break;
6108                 udelay(1);
6109         }
6110 }
6111
6112 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6113 {
6114         u32 tmp;
6115
6116         tmp = RREG32(RLC_CNTL);
6117         if (tmp != rlc)
6118                 WREG32(RLC_CNTL, rlc);
6119 }
6120
6121 static u32 cik_halt_rlc(struct radeon_device *rdev)
6122 {
6123         u32 data, orig;
6124
6125         orig = data = RREG32(RLC_CNTL);
6126
6127         if (data & RLC_ENABLE) {
6128                 u32 i;
6129
6130                 data &= ~RLC_ENABLE;
6131                 WREG32(RLC_CNTL, data);
6132
6133                 for (i = 0; i < rdev->usec_timeout; i++) {
6134                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6135                                 break;
6136                         udelay(1);
6137                 }
6138
6139                 cik_wait_for_rlc_serdes(rdev);
6140         }
6141
6142         return orig;
6143 }
6144
6145 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6146 {
6147         u32 tmp, i, mask;
6148
6149         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6150         WREG32(RLC_GPR_REG2, tmp);
6151
6152         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6153         for (i = 0; i < rdev->usec_timeout; i++) {
6154                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6155                         break;
6156                 udelay(1);
6157         }
6158
6159         for (i = 0; i < rdev->usec_timeout; i++) {
6160                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6161                         break;
6162                 udelay(1);
6163         }
6164 }
6165
6166 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6167 {
6168         u32 tmp;
6169
6170         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6171         WREG32(RLC_GPR_REG2, tmp);
6172 }
6173
6174 /**
6175  * cik_rlc_stop - stop the RLC ME
6176  *
6177  * @rdev: radeon_device pointer
6178  *
6179  * Halt the RLC ME (MicroEngine) (CIK).
6180  */
6181 static void cik_rlc_stop(struct radeon_device *rdev)
6182 {
6183         WREG32(RLC_CNTL, 0);
6184
6185         cik_enable_gui_idle_interrupt(rdev, false);
6186
6187         cik_wait_for_rlc_serdes(rdev);
6188 }
6189
6190 /**
6191  * cik_rlc_start - start the RLC ME
6192  *
6193  * @rdev: radeon_device pointer
6194  *
6195  * Unhalt the RLC ME (MicroEngine) (CIK).
6196  */
6197 static void cik_rlc_start(struct radeon_device *rdev)
6198 {
6199         WREG32(RLC_CNTL, RLC_ENABLE);
6200
6201         cik_enable_gui_idle_interrupt(rdev, true);
6202
6203         udelay(50);
6204 }
6205
6206 /**
6207  * cik_rlc_resume - setup the RLC hw
6208  *
6209  * @rdev: radeon_device pointer
6210  *
6211  * Initialize the RLC registers, load the ucode,
6212  * and start the RLC (CIK).
6213  * Returns 0 for success, -EINVAL if the ucode is not available.
6214  */
6215 static int cik_rlc_resume(struct radeon_device *rdev)
6216 {
6217         u32 i, size, tmp;
6218
6219         if (!rdev->rlc_fw)
6220                 return -EINVAL;
6221
6222         cik_rlc_stop(rdev);
6223
6224         /* disable CG */
6225         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6226         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6227
6228         si_rlc_reset(rdev);
6229
6230         cik_init_pg(rdev);
6231
6232         cik_init_cg(rdev);
6233
6234         WREG32(RLC_LB_CNTR_INIT, 0);
6235         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6236
6237         mutex_lock(&rdev->grbm_idx_mutex);
6238         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6239         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6240         WREG32(RLC_LB_PARAMS, 0x00600408);
6241         WREG32(RLC_LB_CNTL, 0x80000004);
6242         mutex_unlock(&rdev->grbm_idx_mutex);
6243
6244         WREG32(RLC_MC_CNTL, 0);
6245         WREG32(RLC_UCODE_CNTL, 0);
6246
6247         if (rdev->new_fw) {
6248                 const struct rlc_firmware_header_v1_0 *hdr =
6249                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6250                 const __le32 *fw_data = (const __le32 *)
6251                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6252
6253                 radeon_ucode_print_rlc_hdr(&hdr->header);
6254
6255                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6256                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6257                 for (i = 0; i < size; i++)
6258                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6259                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6260         } else {
6261                 const __be32 *fw_data;
6262
6263                 switch (rdev->family) {
6264                 case CHIP_BONAIRE:
6265                 case CHIP_HAWAII:
6266                 default:
6267                         size = BONAIRE_RLC_UCODE_SIZE;
6268                         break;
6269                 case CHIP_KAVERI:
6270                         size = KV_RLC_UCODE_SIZE;
6271                         break;
6272                 case CHIP_KABINI:
6273                         size = KB_RLC_UCODE_SIZE;
6274                         break;
6275                 case CHIP_MULLINS:
6276                         size = ML_RLC_UCODE_SIZE;
6277                         break;
6278                 }
6279
6280                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6281                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6282                 for (i = 0; i < size; i++)
6283                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6284                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6285         }
6286
6287         /* XXX - find out what chips support lbpw */
6288         cik_enable_lbpw(rdev, false);
6289
6290         if (rdev->family == CHIP_BONAIRE)
6291                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6292
6293         cik_rlc_start(rdev);
6294
6295         return 0;
6296 }
6297
6298 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6299 {
6300         u32 data, orig, tmp, tmp2;
6301
6302         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6303
6304         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6305                 cik_enable_gui_idle_interrupt(rdev, true);
6306
6307                 tmp = cik_halt_rlc(rdev);
6308
6309                 mutex_lock(&rdev->grbm_idx_mutex);
6310                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6311                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6312                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6313                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6314                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6315                 mutex_unlock(&rdev->grbm_idx_mutex);
6316
6317                 cik_update_rlc(rdev, tmp);
6318
6319                 data |= CGCG_EN | CGLS_EN;
6320         } else {
6321                 cik_enable_gui_idle_interrupt(rdev, false);
6322
6323                 RREG32(CB_CGTT_SCLK_CTRL);
6324                 RREG32(CB_CGTT_SCLK_CTRL);
6325                 RREG32(CB_CGTT_SCLK_CTRL);
6326                 RREG32(CB_CGTT_SCLK_CTRL);
6327
6328                 data &= ~(CGCG_EN | CGLS_EN);
6329         }
6330
6331         if (orig != data)
6332                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6333
6334 }
6335
6336 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6337 {
6338         u32 data, orig, tmp = 0;
6339
6340         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6341                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6342                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6343                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6344                                 data |= CP_MEM_LS_EN;
6345                                 if (orig != data)
6346                                         WREG32(CP_MEM_SLP_CNTL, data);
6347                         }
6348                 }
6349
6350                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6351                 data |= 0x00000001;
6352                 data &= 0xfffffffd;
6353                 if (orig != data)
6354                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6355
6356                 tmp = cik_halt_rlc(rdev);
6357
6358                 mutex_lock(&rdev->grbm_idx_mutex);
6359                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6360                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6361                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6362                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6363                 WREG32(RLC_SERDES_WR_CTRL, data);
6364                 mutex_unlock(&rdev->grbm_idx_mutex);
6365
6366                 cik_update_rlc(rdev, tmp);
6367
6368                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6369                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6370                         data &= ~SM_MODE_MASK;
6371                         data |= SM_MODE(0x2);
6372                         data |= SM_MODE_ENABLE;
6373                         data &= ~CGTS_OVERRIDE;
6374                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6375                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6376                                 data &= ~CGTS_LS_OVERRIDE;
6377                         data &= ~ON_MONITOR_ADD_MASK;
6378                         data |= ON_MONITOR_ADD_EN;
6379                         data |= ON_MONITOR_ADD(0x96);
6380                         if (orig != data)
6381                                 WREG32(CGTS_SM_CTRL_REG, data);
6382                 }
6383         } else {
6384                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6385                 data |= 0x00000003;
6386                 if (orig != data)
6387                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6388
6389                 data = RREG32(RLC_MEM_SLP_CNTL);
6390                 if (data & RLC_MEM_LS_EN) {
6391                         data &= ~RLC_MEM_LS_EN;
6392                         WREG32(RLC_MEM_SLP_CNTL, data);
6393                 }
6394
6395                 data = RREG32(CP_MEM_SLP_CNTL);
6396                 if (data & CP_MEM_LS_EN) {
6397                         data &= ~CP_MEM_LS_EN;
6398                         WREG32(CP_MEM_SLP_CNTL, data);
6399                 }
6400
6401                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6402                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6403                 if (orig != data)
6404                         WREG32(CGTS_SM_CTRL_REG, data);
6405
6406                 tmp = cik_halt_rlc(rdev);
6407
6408                 mutex_lock(&rdev->grbm_idx_mutex);
6409                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6410                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6411                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6412                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6413                 WREG32(RLC_SERDES_WR_CTRL, data);
6414                 mutex_unlock(&rdev->grbm_idx_mutex);
6415
6416                 cik_update_rlc(rdev, tmp);
6417         }
6418 }
6419
6420 static const u32 mc_cg_registers[] =
6421 {
6422         MC_HUB_MISC_HUB_CG,
6423         MC_HUB_MISC_SIP_CG,
6424         MC_HUB_MISC_VM_CG,
6425         MC_XPB_CLK_GAT,
6426         ATC_MISC_CG,
6427         MC_CITF_MISC_WR_CG,
6428         MC_CITF_MISC_RD_CG,
6429         MC_CITF_MISC_VM_CG,
6430         VM_L2_CG,
6431 };
6432
6433 static void cik_enable_mc_ls(struct radeon_device *rdev,
6434                              bool enable)
6435 {
6436         int i;
6437         u32 orig, data;
6438
6439         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6440                 orig = data = RREG32(mc_cg_registers[i]);
6441                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6442                         data |= MC_LS_ENABLE;
6443                 else
6444                         data &= ~MC_LS_ENABLE;
6445                 if (data != orig)
6446                         WREG32(mc_cg_registers[i], data);
6447         }
6448 }
6449
6450 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6451                                bool enable)
6452 {
6453         int i;
6454         u32 orig, data;
6455
6456         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6457                 orig = data = RREG32(mc_cg_registers[i]);
6458                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6459                         data |= MC_CG_ENABLE;
6460                 else
6461                         data &= ~MC_CG_ENABLE;
6462                 if (data != orig)
6463                         WREG32(mc_cg_registers[i], data);
6464         }
6465 }
6466
6467 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6468                                  bool enable)
6469 {
6470         u32 orig, data;
6471
6472         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6473                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6474                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6475         } else {
6476                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6477                 data |= 0xff000000;
6478                 if (data != orig)
6479                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6480
6481                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6482                 data |= 0xff000000;
6483                 if (data != orig)
6484                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6485         }
6486 }
6487
6488 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6489                                  bool enable)
6490 {
6491         u32 orig, data;
6492
6493         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6494                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6495                 data |= 0x100;
6496                 if (orig != data)
6497                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6498
6499                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6500                 data |= 0x100;
6501                 if (orig != data)
6502                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6503         } else {
6504                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6505                 data &= ~0x100;
6506                 if (orig != data)
6507                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6508
6509                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6510                 data &= ~0x100;
6511                 if (orig != data)
6512                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6513         }
6514 }
6515
6516 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6517                                 bool enable)
6518 {
6519         u32 orig, data;
6520
6521         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6522                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6523                 data = 0xfff;
6524                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6525
6526                 orig = data = RREG32(UVD_CGC_CTRL);
6527                 data |= DCM;
6528                 if (orig != data)
6529                         WREG32(UVD_CGC_CTRL, data);
6530         } else {
6531                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6532                 data &= ~0xfff;
6533                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6534
6535                 orig = data = RREG32(UVD_CGC_CTRL);
6536                 data &= ~DCM;
6537                 if (orig != data)
6538                         WREG32(UVD_CGC_CTRL, data);
6539         }
6540 }
6541
6542 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6543                                bool enable)
6544 {
6545         u32 orig, data;
6546
6547         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6548
6549         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6550                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6551                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6552         else
6553                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6554                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6555
6556         if (orig != data)
6557                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6558 }
6559
6560 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6561                                 bool enable)
6562 {
6563         u32 orig, data;
6564
6565         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6566
6567         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6568                 data &= ~CLOCK_GATING_DIS;
6569         else
6570                 data |= CLOCK_GATING_DIS;
6571
6572         if (orig != data)
6573                 WREG32(HDP_HOST_PATH_CNTL, data);
6574 }
6575
6576 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6577                               bool enable)
6578 {
6579         u32 orig, data;
6580
6581         orig = data = RREG32(HDP_MEM_POWER_LS);
6582
6583         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6584                 data |= HDP_LS_ENABLE;
6585         else
6586                 data &= ~HDP_LS_ENABLE;
6587
6588         if (orig != data)
6589                 WREG32(HDP_MEM_POWER_LS, data);
6590 }
6591
6592 void cik_update_cg(struct radeon_device *rdev,
6593                    u32 block, bool enable)
6594 {
6595
6596         if (block & RADEON_CG_BLOCK_GFX) {
6597                 cik_enable_gui_idle_interrupt(rdev, false);
6598                 /* order matters! */
6599                 if (enable) {
6600                         cik_enable_mgcg(rdev, true);
6601                         cik_enable_cgcg(rdev, true);
6602                 } else {
6603                         cik_enable_cgcg(rdev, false);
6604                         cik_enable_mgcg(rdev, false);
6605                 }
6606                 cik_enable_gui_idle_interrupt(rdev, true);
6607         }
6608
6609         if (block & RADEON_CG_BLOCK_MC) {
6610                 if (!(rdev->flags & RADEON_IS_IGP)) {
6611                         cik_enable_mc_mgcg(rdev, enable);
6612                         cik_enable_mc_ls(rdev, enable);
6613                 }
6614         }
6615
6616         if (block & RADEON_CG_BLOCK_SDMA) {
6617                 cik_enable_sdma_mgcg(rdev, enable);
6618                 cik_enable_sdma_mgls(rdev, enable);
6619         }
6620
6621         if (block & RADEON_CG_BLOCK_BIF) {
6622                 cik_enable_bif_mgls(rdev, enable);
6623         }
6624
6625         if (block & RADEON_CG_BLOCK_UVD) {
6626                 if (rdev->has_uvd)
6627                         cik_enable_uvd_mgcg(rdev, enable);
6628         }
6629
6630         if (block & RADEON_CG_BLOCK_HDP) {
6631                 cik_enable_hdp_mgcg(rdev, enable);
6632                 cik_enable_hdp_ls(rdev, enable);
6633         }
6634
6635         if (block & RADEON_CG_BLOCK_VCE) {
6636                 vce_v2_0_enable_mgcg(rdev, enable);
6637         }
6638 }
6639
6640 static void cik_init_cg(struct radeon_device *rdev)
6641 {
6642
6643         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6644
6645         if (rdev->has_uvd)
6646                 si_init_uvd_internal_cg(rdev);
6647
6648         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6649                              RADEON_CG_BLOCK_SDMA |
6650                              RADEON_CG_BLOCK_BIF |
6651                              RADEON_CG_BLOCK_UVD |
6652                              RADEON_CG_BLOCK_HDP), true);
6653 }
6654
6655 static void cik_fini_cg(struct radeon_device *rdev)
6656 {
6657         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6658                              RADEON_CG_BLOCK_SDMA |
6659                              RADEON_CG_BLOCK_BIF |
6660                              RADEON_CG_BLOCK_UVD |
6661                              RADEON_CG_BLOCK_HDP), false);
6662
6663         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6664 }
6665
6666 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6667                                           bool enable)
6668 {
6669         u32 data, orig;
6670
6671         orig = data = RREG32(RLC_PG_CNTL);
6672         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6673                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6674         else
6675                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6676         if (orig != data)
6677                 WREG32(RLC_PG_CNTL, data);
6678 }
6679
6680 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6681                                           bool enable)
6682 {
6683         u32 data, orig;
6684
6685         orig = data = RREG32(RLC_PG_CNTL);
6686         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6687                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6688         else
6689                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6690         if (orig != data)
6691                 WREG32(RLC_PG_CNTL, data);
6692 }
6693
6694 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6695 {
6696         u32 data, orig;
6697
6698         orig = data = RREG32(RLC_PG_CNTL);
6699         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6700                 data &= ~DISABLE_CP_PG;
6701         else
6702                 data |= DISABLE_CP_PG;
6703         if (orig != data)
6704                 WREG32(RLC_PG_CNTL, data);
6705 }
6706
6707 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6708 {
6709         u32 data, orig;
6710
6711         orig = data = RREG32(RLC_PG_CNTL);
6712         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6713                 data &= ~DISABLE_GDS_PG;
6714         else
6715                 data |= DISABLE_GDS_PG;
6716         if (orig != data)
6717                 WREG32(RLC_PG_CNTL, data);
6718 }
6719
6720 #define CP_ME_TABLE_SIZE    96
6721 #define CP_ME_TABLE_OFFSET  2048
6722 #define CP_MEC_TABLE_OFFSET 4096
6723
6724 void cik_init_cp_pg_table(struct radeon_device *rdev)
6725 {
6726         volatile u32 *dst_ptr;
6727         int me, i, max_me = 4;
6728         u32 bo_offset = 0;
6729         u32 table_offset, table_size;
6730
6731         if (rdev->family == CHIP_KAVERI)
6732                 max_me = 5;
6733
6734         if (rdev->rlc.cp_table_ptr == NULL)
6735                 return;
6736
6737         /* write the cp table buffer */
6738         dst_ptr = rdev->rlc.cp_table_ptr;
6739         for (me = 0; me < max_me; me++) {
6740                 if (rdev->new_fw) {
6741                         const __le32 *fw_data;
6742                         const struct gfx_firmware_header_v1_0 *hdr;
6743
6744                         if (me == 0) {
6745                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6746                                 fw_data = (const __le32 *)
6747                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6748                                 table_offset = le32_to_cpu(hdr->jt_offset);
6749                                 table_size = le32_to_cpu(hdr->jt_size);
6750                         } else if (me == 1) {
6751                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6752                                 fw_data = (const __le32 *)
6753                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6754                                 table_offset = le32_to_cpu(hdr->jt_offset);
6755                                 table_size = le32_to_cpu(hdr->jt_size);
6756                         } else if (me == 2) {
6757                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6758                                 fw_data = (const __le32 *)
6759                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6760                                 table_offset = le32_to_cpu(hdr->jt_offset);
6761                                 table_size = le32_to_cpu(hdr->jt_size);
6762                         } else if (me == 3) {
6763                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6764                                 fw_data = (const __le32 *)
6765                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6766                                 table_offset = le32_to_cpu(hdr->jt_offset);
6767                                 table_size = le32_to_cpu(hdr->jt_size);
6768                         } else {
6769                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6770                                 fw_data = (const __le32 *)
6771                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6772                                 table_offset = le32_to_cpu(hdr->jt_offset);
6773                                 table_size = le32_to_cpu(hdr->jt_size);
6774                         }
6775
6776                         for (i = 0; i < table_size; i ++) {
6777                                 dst_ptr[bo_offset + i] =
6778                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6779                         }
6780                         bo_offset += table_size;
6781                 } else {
6782                         const __be32 *fw_data;
6783                         table_size = CP_ME_TABLE_SIZE;
6784
6785                         if (me == 0) {
6786                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6787                                 table_offset = CP_ME_TABLE_OFFSET;
6788                         } else if (me == 1) {
6789                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6790                                 table_offset = CP_ME_TABLE_OFFSET;
6791                         } else if (me == 2) {
6792                                 fw_data = (const __be32 *)rdev->me_fw->data;
6793                                 table_offset = CP_ME_TABLE_OFFSET;
6794                         } else {
6795                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6796                                 table_offset = CP_MEC_TABLE_OFFSET;
6797                         }
6798
6799                         for (i = 0; i < table_size; i ++) {
6800                                 dst_ptr[bo_offset + i] =
6801                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6802                         }
6803                         bo_offset += table_size;
6804                 }
6805         }
6806 }
6807
6808 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6809                                 bool enable)
6810 {
6811         u32 data, orig;
6812
6813         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6814                 orig = data = RREG32(RLC_PG_CNTL);
6815                 data |= GFX_PG_ENABLE;
6816                 if (orig != data)
6817                         WREG32(RLC_PG_CNTL, data);
6818
6819                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6820                 data |= AUTO_PG_EN;
6821                 if (orig != data)
6822                         WREG32(RLC_AUTO_PG_CTRL, data);
6823         } else {
6824                 orig = data = RREG32(RLC_PG_CNTL);
6825                 data &= ~GFX_PG_ENABLE;
6826                 if (orig != data)
6827                         WREG32(RLC_PG_CNTL, data);
6828
6829                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6830                 data &= ~AUTO_PG_EN;
6831                 if (orig != data)
6832                         WREG32(RLC_AUTO_PG_CTRL, data);
6833
6834                 data = RREG32(DB_RENDER_CONTROL);
6835         }
6836 }
6837
6838 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6839 {
6840         u32 mask = 0, tmp, tmp1;
6841         int i;
6842
6843         mutex_lock(&rdev->grbm_idx_mutex);
6844         cik_select_se_sh(rdev, se, sh);
6845         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6846         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6847         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6848         mutex_unlock(&rdev->grbm_idx_mutex);
6849
6850         tmp &= 0xffff0000;
6851
6852         tmp |= tmp1;
6853         tmp >>= 16;
6854
6855         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6856                 mask <<= 1;
6857                 mask |= 1;
6858         }
6859
6860         return (~tmp) & mask;
6861 }
6862
6863 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6864 {
6865         u32 i, j, k, active_cu_number = 0;
6866         u32 mask, counter, cu_bitmap;
6867         u32 tmp = 0;
6868
6869         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6870                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6871                         mask = 1;
6872                         cu_bitmap = 0;
6873                         counter = 0;
6874                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6875                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6876                                         if (counter < 2)
6877                                                 cu_bitmap |= mask;
6878                                         counter ++;
6879                                 }
6880                                 mask <<= 1;
6881                         }
6882
6883                         active_cu_number += counter;
6884                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6885                 }
6886         }
6887
6888         WREG32(RLC_PG_AO_CU_MASK, tmp);
6889
6890         tmp = RREG32(RLC_MAX_PG_CU);
6891         tmp &= ~MAX_PU_CU_MASK;
6892         tmp |= MAX_PU_CU(active_cu_number);
6893         WREG32(RLC_MAX_PG_CU, tmp);
6894 }
6895
6896 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6897                                        bool enable)
6898 {
6899         u32 data, orig;
6900
6901         orig = data = RREG32(RLC_PG_CNTL);
6902         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6903                 data |= STATIC_PER_CU_PG_ENABLE;
6904         else
6905                 data &= ~STATIC_PER_CU_PG_ENABLE;
6906         if (orig != data)
6907                 WREG32(RLC_PG_CNTL, data);
6908 }
6909
6910 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6911                                         bool enable)
6912 {
6913         u32 data, orig;
6914
6915         orig = data = RREG32(RLC_PG_CNTL);
6916         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6917                 data |= DYN_PER_CU_PG_ENABLE;
6918         else
6919                 data &= ~DYN_PER_CU_PG_ENABLE;
6920         if (orig != data)
6921                 WREG32(RLC_PG_CNTL, data);
6922 }
6923
6924 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6925 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6926
6927 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6928 {
6929         u32 data, orig;
6930         u32 i;
6931
6932         if (rdev->rlc.cs_data) {
6933                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6934                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6935                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6936                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6937         } else {
6938                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6939                 for (i = 0; i < 3; i++)
6940                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6941         }
6942         if (rdev->rlc.reg_list) {
6943                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6944                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6945                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6946         }
6947
6948         orig = data = RREG32(RLC_PG_CNTL);
6949         data |= GFX_PG_SRC;
6950         if (orig != data)
6951                 WREG32(RLC_PG_CNTL, data);
6952
6953         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6954         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6955
6956         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6957         data &= ~IDLE_POLL_COUNT_MASK;
6958         data |= IDLE_POLL_COUNT(0x60);
6959         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6960
6961         data = 0x10101010;
6962         WREG32(RLC_PG_DELAY, data);
6963
6964         data = RREG32(RLC_PG_DELAY_2);
6965         data &= ~0xff;
6966         data |= 0x3;
6967         WREG32(RLC_PG_DELAY_2, data);
6968
6969         data = RREG32(RLC_AUTO_PG_CTRL);
6970         data &= ~GRBM_REG_SGIT_MASK;
6971         data |= GRBM_REG_SGIT(0x700);
6972         WREG32(RLC_AUTO_PG_CTRL, data);
6973
6974 }
6975
6976 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6977 {
6978         cik_enable_gfx_cgpg(rdev, enable);
6979         cik_enable_gfx_static_mgpg(rdev, enable);
6980         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6981 }
6982
6983 u32 cik_get_csb_size(struct radeon_device *rdev)
6984 {
6985         u32 count = 0;
6986         const struct cs_section_def *sect = NULL;
6987         const struct cs_extent_def *ext = NULL;
6988
6989         if (rdev->rlc.cs_data == NULL)
6990                 return 0;
6991
6992         /* begin clear state */
6993         count += 2;
6994         /* context control state */
6995         count += 3;
6996
6997         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6998                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6999                         if (sect->id == SECT_CONTEXT)
7000                                 count += 2 + ext->reg_count;
7001                         else
7002                                 return 0;
7003                 }
7004         }
7005         /* pa_sc_raster_config/pa_sc_raster_config1 */
7006         count += 4;
7007         /* end clear state */
7008         count += 2;
7009         /* clear state */
7010         count += 2;
7011
7012         return count;
7013 }
7014
7015 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7016 {
7017         u32 count = 0, i;
7018         const struct cs_section_def *sect = NULL;
7019         const struct cs_extent_def *ext = NULL;
7020
7021         if (rdev->rlc.cs_data == NULL)
7022                 return;
7023         if (buffer == NULL)
7024                 return;
7025
7026         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7027         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7028
7029         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7030         buffer[count++] = cpu_to_le32(0x80000000);
7031         buffer[count++] = cpu_to_le32(0x80000000);
7032
7033         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7034                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7035                         if (sect->id == SECT_CONTEXT) {
7036                                 buffer[count++] =
7037                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7038                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7039                                 for (i = 0; i < ext->reg_count; i++)
7040                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7041                         } else {
7042                                 return;
7043                         }
7044                 }
7045         }
7046
7047         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7048         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7049         switch (rdev->family) {
7050         case CHIP_BONAIRE:
7051                 buffer[count++] = cpu_to_le32(0x16000012);
7052                 buffer[count++] = cpu_to_le32(0x00000000);
7053                 break;
7054         case CHIP_KAVERI:
7055                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7056                 buffer[count++] = cpu_to_le32(0x00000000);
7057                 break;
7058         case CHIP_KABINI:
7059         case CHIP_MULLINS:
7060                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7061                 buffer[count++] = cpu_to_le32(0x00000000);
7062                 break;
7063         case CHIP_HAWAII:
7064                 buffer[count++] = cpu_to_le32(0x3a00161a);
7065                 buffer[count++] = cpu_to_le32(0x0000002e);
7066                 break;
7067         default:
7068                 buffer[count++] = cpu_to_le32(0x00000000);
7069                 buffer[count++] = cpu_to_le32(0x00000000);
7070                 break;
7071         }
7072
7073         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7074         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7075
7076         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7077         buffer[count++] = cpu_to_le32(0);
7078 }
7079
7080 static void cik_init_pg(struct radeon_device *rdev)
7081 {
7082         if (rdev->pg_flags) {
7083                 cik_enable_sck_slowdown_on_pu(rdev, true);
7084                 cik_enable_sck_slowdown_on_pd(rdev, true);
7085                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7086                         cik_init_gfx_cgpg(rdev);
7087                         cik_enable_cp_pg(rdev, true);
7088                         cik_enable_gds_pg(rdev, true);
7089                 }
7090                 cik_init_ao_cu_mask(rdev);
7091                 cik_update_gfx_pg(rdev, true);
7092         }
7093 }
7094
7095 static void cik_fini_pg(struct radeon_device *rdev)
7096 {
7097         if (rdev->pg_flags) {
7098                 cik_update_gfx_pg(rdev, false);
7099                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7100                         cik_enable_cp_pg(rdev, false);
7101                         cik_enable_gds_pg(rdev, false);
7102                 }
7103         }
7104 }
7105
7106 /*
7107  * Interrupts
7108  * Starting with r6xx, interrupts are handled via a ring buffer.
7109  * Ring buffers are areas of GPU accessible memory that the GPU
7110  * writes interrupt vectors into and the host reads vectors out of.
7111  * There is a rptr (read pointer) that determines where the
7112  * host is currently reading, and a wptr (write pointer)
7113  * which determines where the GPU has written.  When the
7114  * pointers are equal, the ring is idle.  When the GPU
7115  * writes vectors to the ring buffer, it increments the
7116  * wptr.  When there is an interrupt, the host then starts
7117  * fetching commands and processing them until the pointers are
7118  * equal again at which point it updates the rptr.
7119  */
7120
7121 /**
7122  * cik_enable_interrupts - Enable the interrupt ring buffer
7123  *
7124  * @rdev: radeon_device pointer
7125  *
7126  * Enable the interrupt ring buffer (CIK).
7127  */
7128 static void cik_enable_interrupts(struct radeon_device *rdev)
7129 {
7130         u32 ih_cntl = RREG32(IH_CNTL);
7131         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7132
7133         ih_cntl |= ENABLE_INTR;
7134         ih_rb_cntl |= IH_RB_ENABLE;
7135         WREG32(IH_CNTL, ih_cntl);
7136         WREG32(IH_RB_CNTL, ih_rb_cntl);
7137         rdev->ih.enabled = true;
7138 }
7139
7140 /**
7141  * cik_disable_interrupts - Disable the interrupt ring buffer
7142  *
7143  * @rdev: radeon_device pointer
7144  *
7145  * Disable the interrupt ring buffer (CIK).
7146  */
7147 static void cik_disable_interrupts(struct radeon_device *rdev)
7148 {
7149         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7150         u32 ih_cntl = RREG32(IH_CNTL);
7151
7152         ih_rb_cntl &= ~IH_RB_ENABLE;
7153         ih_cntl &= ~ENABLE_INTR;
7154         WREG32(IH_RB_CNTL, ih_rb_cntl);
7155         WREG32(IH_CNTL, ih_cntl);
7156         /* set rptr, wptr to 0 */
7157         WREG32(IH_RB_RPTR, 0);
7158         WREG32(IH_RB_WPTR, 0);
7159         rdev->ih.enabled = false;
7160         rdev->ih.rptr = 0;
7161 }
7162
7163 /**
7164  * cik_disable_interrupt_state - Disable all interrupt sources
7165  *
7166  * @rdev: radeon_device pointer
7167  *
7168  * Clear all interrupt enable bits used by the driver (CIK).
7169  */
7170 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7171 {
7172         u32 tmp;
7173
7174         /* gfx ring */
7175         tmp = RREG32(CP_INT_CNTL_RING0) &
7176                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7177         WREG32(CP_INT_CNTL_RING0, tmp);
7178         /* sdma */
7179         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7180         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7181         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7182         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7183         /* compute queues */
7184         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7185         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7186         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7187         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7188         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7189         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7190         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7191         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7192         /* grbm */
7193         WREG32(GRBM_INT_CNTL, 0);
7194         /* vline/vblank, etc. */
7195         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7196         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7197         if (rdev->num_crtc >= 4) {
7198                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7199                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7200         }
7201         if (rdev->num_crtc >= 6) {
7202                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7203                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7204         }
7205         /* pflip */
7206         if (rdev->num_crtc >= 2) {
7207                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7208                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7209         }
7210         if (rdev->num_crtc >= 4) {
7211                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7212                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7213         }
7214         if (rdev->num_crtc >= 6) {
7215                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7216                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7217         }
7218
7219         /* dac hotplug */
7220         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7221
7222         /* digital hotplug */
7223         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7224         WREG32(DC_HPD1_INT_CONTROL, tmp);
7225         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7226         WREG32(DC_HPD2_INT_CONTROL, tmp);
7227         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7228         WREG32(DC_HPD3_INT_CONTROL, tmp);
7229         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7230         WREG32(DC_HPD4_INT_CONTROL, tmp);
7231         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7232         WREG32(DC_HPD5_INT_CONTROL, tmp);
7233         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7234         WREG32(DC_HPD6_INT_CONTROL, tmp);
7235
7236 }
7237
7238 /**
7239  * cik_irq_init - init and enable the interrupt ring
7240  *
7241  * @rdev: radeon_device pointer
7242  *
7243  * Allocate a ring buffer for the interrupt controller,
7244  * enable the RLC, disable interrupts, enable the IH
7245  * ring buffer and enable it (CIK).
7246  * Called at device load and reume.
7247  * Returns 0 for success, errors for failure.
7248  */
7249 static int cik_irq_init(struct radeon_device *rdev)
7250 {
7251         int ret = 0;
7252         int rb_bufsz;
7253         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7254
7255         /* allocate ring */
7256         ret = r600_ih_ring_alloc(rdev);
7257         if (ret)
7258                 return ret;
7259
7260         /* disable irqs */
7261         cik_disable_interrupts(rdev);
7262
7263         /* init rlc */
7264         ret = cik_rlc_resume(rdev);
7265         if (ret) {
7266                 r600_ih_ring_fini(rdev);
7267                 return ret;
7268         }
7269
7270         /* setup interrupt control */
7271         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7272         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7273         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7274         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7275          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7276          */
7277         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7278         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7279         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7280         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7281
7282         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7283         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7284
7285         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7286                       IH_WPTR_OVERFLOW_CLEAR |
7287                       (rb_bufsz << 1));
7288
7289         if (rdev->wb.enabled)
7290                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7291
7292         /* set the writeback address whether it's enabled or not */
7293         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7294         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7295
7296         WREG32(IH_RB_CNTL, ih_rb_cntl);
7297
7298         /* set rptr, wptr to 0 */
7299         WREG32(IH_RB_RPTR, 0);
7300         WREG32(IH_RB_WPTR, 0);
7301
7302         /* Default settings for IH_CNTL (disabled at first) */
7303         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7304         /* RPTR_REARM only works if msi's are enabled */
7305         if (rdev->msi_enabled)
7306                 ih_cntl |= RPTR_REARM;
7307         WREG32(IH_CNTL, ih_cntl);
7308
7309         /* force the active interrupt state to all disabled */
7310         cik_disable_interrupt_state(rdev);
7311
7312         pci_set_master(rdev->pdev);
7313
7314         /* enable irqs */
7315         cik_enable_interrupts(rdev);
7316
7317         return ret;
7318 }
7319
7320 /**
7321  * cik_irq_set - enable/disable interrupt sources
7322  *
7323  * @rdev: radeon_device pointer
7324  *
7325  * Enable interrupt sources on the GPU (vblanks, hpd,
7326  * etc.) (CIK).
7327  * Returns 0 for success, errors for failure.
7328  */
7329 int cik_irq_set(struct radeon_device *rdev)
7330 {
7331         u32 cp_int_cntl;
7332         u32 cp_m1p0;
7333         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7334         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7335         u32 grbm_int_cntl = 0;
7336         u32 dma_cntl, dma_cntl1;
7337         u32 thermal_int;
7338
7339         if (!rdev->irq.installed) {
7340                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7341                 return -EINVAL;
7342         }
7343         /* don't enable anything if the ih is disabled */
7344         if (!rdev->ih.enabled) {
7345                 cik_disable_interrupts(rdev);
7346                 /* force the active interrupt state to all disabled */
7347                 cik_disable_interrupt_state(rdev);
7348                 return 0;
7349         }
7350
7351         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7352                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7353         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7354
7355         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7356         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7357         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7358         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7359         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7360         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7361
7362         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7363         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7364
7365         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7366
7367         if (rdev->flags & RADEON_IS_IGP)
7368                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7369                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
7370         else
7371                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7372                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7373
7374         /* enable CP interrupts on all rings */
7375         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7376                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7377                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7378         }
7379         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7380                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7381                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7382                 if (ring->me == 1) {
7383                         switch (ring->pipe) {
7384                         case 0:
7385                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7386                                 break;
7387                         default:
7388                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7389                                 break;
7390                         }
7391                 } else {
7392                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7393                 }
7394         }
7395         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7396                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7397                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7398                 if (ring->me == 1) {
7399                         switch (ring->pipe) {
7400                         case 0:
7401                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7402                                 break;
7403                         default:
7404                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7405                                 break;
7406                         }
7407                 } else {
7408                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7409                 }
7410         }
7411
7412         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7413                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7414                 dma_cntl |= TRAP_ENABLE;
7415         }
7416
7417         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7418                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7419                 dma_cntl1 |= TRAP_ENABLE;
7420         }
7421
7422         if (rdev->irq.crtc_vblank_int[0] ||
7423             atomic_read(&rdev->irq.pflip[0])) {
7424                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7425                 crtc1 |= VBLANK_INTERRUPT_MASK;
7426         }
7427         if (rdev->irq.crtc_vblank_int[1] ||
7428             atomic_read(&rdev->irq.pflip[1])) {
7429                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7430                 crtc2 |= VBLANK_INTERRUPT_MASK;
7431         }
7432         if (rdev->irq.crtc_vblank_int[2] ||
7433             atomic_read(&rdev->irq.pflip[2])) {
7434                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7435                 crtc3 |= VBLANK_INTERRUPT_MASK;
7436         }
7437         if (rdev->irq.crtc_vblank_int[3] ||
7438             atomic_read(&rdev->irq.pflip[3])) {
7439                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7440                 crtc4 |= VBLANK_INTERRUPT_MASK;
7441         }
7442         if (rdev->irq.crtc_vblank_int[4] ||
7443             atomic_read(&rdev->irq.pflip[4])) {
7444                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7445                 crtc5 |= VBLANK_INTERRUPT_MASK;
7446         }
7447         if (rdev->irq.crtc_vblank_int[5] ||
7448             atomic_read(&rdev->irq.pflip[5])) {
7449                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7450                 crtc6 |= VBLANK_INTERRUPT_MASK;
7451         }
7452         if (rdev->irq.hpd[0]) {
7453                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7454                 hpd1 |= DC_HPDx_INT_EN;
7455         }
7456         if (rdev->irq.hpd[1]) {
7457                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7458                 hpd2 |= DC_HPDx_INT_EN;
7459         }
7460         if (rdev->irq.hpd[2]) {
7461                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7462                 hpd3 |= DC_HPDx_INT_EN;
7463         }
7464         if (rdev->irq.hpd[3]) {
7465                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7466                 hpd4 |= DC_HPDx_INT_EN;
7467         }
7468         if (rdev->irq.hpd[4]) {
7469                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7470                 hpd5 |= DC_HPDx_INT_EN;
7471         }
7472         if (rdev->irq.hpd[5]) {
7473                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7474                 hpd6 |= DC_HPDx_INT_EN;
7475         }
7476
7477         if (rdev->irq.dpm_thermal) {
7478                 DRM_DEBUG("dpm thermal\n");
7479                 if (rdev->flags & RADEON_IS_IGP)
7480                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7481                 else
7482                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7483         }
7484
7485         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7486
7487         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7488         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7489
7490         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7491
7492         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7493
7494         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7495         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7496         if (rdev->num_crtc >= 4) {
7497                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7498                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7499         }
7500         if (rdev->num_crtc >= 6) {
7501                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7502                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7503         }
7504
7505         if (rdev->num_crtc >= 2) {
7506                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7507                        GRPH_PFLIP_INT_MASK);
7508                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7509                        GRPH_PFLIP_INT_MASK);
7510         }
7511         if (rdev->num_crtc >= 4) {
7512                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7513                        GRPH_PFLIP_INT_MASK);
7514                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7515                        GRPH_PFLIP_INT_MASK);
7516         }
7517         if (rdev->num_crtc >= 6) {
7518                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7519                        GRPH_PFLIP_INT_MASK);
7520                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7521                        GRPH_PFLIP_INT_MASK);
7522         }
7523
7524         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7525         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7526         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7527         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7528         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7529         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7530
7531         if (rdev->flags & RADEON_IS_IGP)
7532                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7533         else
7534                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7535
7536         return 0;
7537 }
7538
7539 /**
7540  * cik_irq_ack - ack interrupt sources
7541  *
7542  * @rdev: radeon_device pointer
7543  *
7544  * Ack interrupt sources on the GPU (vblanks, hpd,
7545  * etc.) (CIK).  Certain interrupts sources are sw
7546  * generated and do not require an explicit ack.
7547  */
7548 static inline void cik_irq_ack(struct radeon_device *rdev)
7549 {
7550         u32 tmp;
7551
7552         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7553         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7554         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7555         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7556         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7557         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7558         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7559
7560         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7561                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7562         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7563                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7564         if (rdev->num_crtc >= 4) {
7565                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7566                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7567                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7568                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7569         }
7570         if (rdev->num_crtc >= 6) {
7571                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7572                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7573                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7574                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7575         }
7576
7577         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7578                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7579                        GRPH_PFLIP_INT_CLEAR);
7580         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7581                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7582                        GRPH_PFLIP_INT_CLEAR);
7583         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7584                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7585         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7586                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7587         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7588                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7589         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7590                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7591
7592         if (rdev->num_crtc >= 4) {
7593                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7594                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7595                                GRPH_PFLIP_INT_CLEAR);
7596                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7597                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7598                                GRPH_PFLIP_INT_CLEAR);
7599                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7600                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7601                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7602                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7603                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7604                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7605                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7606                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7607         }
7608
7609         if (rdev->num_crtc >= 6) {
7610                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7611                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7612                                GRPH_PFLIP_INT_CLEAR);
7613                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7614                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7615                                GRPH_PFLIP_INT_CLEAR);
7616                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7617                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7618                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7619                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7620                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7621                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7622                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7623                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7624         }
7625
7626         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7627                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7628                 tmp |= DC_HPDx_INT_ACK;
7629                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7630         }
7631         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7632                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7633                 tmp |= DC_HPDx_INT_ACK;
7634                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7635         }
7636         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7637                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7638                 tmp |= DC_HPDx_INT_ACK;
7639                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7640         }
7641         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7642                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7643                 tmp |= DC_HPDx_INT_ACK;
7644                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7645         }
7646         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7647                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7648                 tmp |= DC_HPDx_INT_ACK;
7649                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7650         }
7651         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7652                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7653                 tmp |= DC_HPDx_INT_ACK;
7654                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7655         }
7656 }
7657
7658 /**
7659  * cik_irq_disable - disable interrupts
7660  *
7661  * @rdev: radeon_device pointer
7662  *
7663  * Disable interrupts on the hw (CIK).
7664  */
7665 static void cik_irq_disable(struct radeon_device *rdev)
7666 {
7667         cik_disable_interrupts(rdev);
7668         /* Wait and acknowledge irq */
7669         mdelay(1);
7670         cik_irq_ack(rdev);
7671         cik_disable_interrupt_state(rdev);
7672 }
7673
7674 /**
7675  * cik_irq_disable - disable interrupts for suspend
7676  *
7677  * @rdev: radeon_device pointer
7678  *
7679  * Disable interrupts and stop the RLC (CIK).
7680  * Used for suspend.
7681  */
7682 static void cik_irq_suspend(struct radeon_device *rdev)
7683 {
7684         cik_irq_disable(rdev);
7685         cik_rlc_stop(rdev);
7686 }
7687
7688 /**
7689  * cik_irq_fini - tear down interrupt support
7690  *
7691  * @rdev: radeon_device pointer
7692  *
7693  * Disable interrupts on the hw and free the IH ring
7694  * buffer (CIK).
7695  * Used for driver unload.
7696  */
7697 static void cik_irq_fini(struct radeon_device *rdev)
7698 {
7699         cik_irq_suspend(rdev);
7700         r600_ih_ring_fini(rdev);
7701 }
7702
7703 /**
7704  * cik_get_ih_wptr - get the IH ring buffer wptr
7705  *
7706  * @rdev: radeon_device pointer
7707  *
7708  * Get the IH ring buffer wptr from either the register
7709  * or the writeback memory buffer (CIK).  Also check for
7710  * ring buffer overflow and deal with it.
7711  * Used by cik_irq_process().
7712  * Returns the value of the wptr.
7713  */
7714 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7715 {
7716         u32 wptr, tmp;
7717
7718         if (rdev->wb.enabled)
7719                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7720         else
7721                 wptr = RREG32(IH_RB_WPTR);
7722
7723         if (wptr & RB_OVERFLOW) {
7724                 wptr &= ~RB_OVERFLOW;
7725                 /* When a ring buffer overflow happen start parsing interrupt
7726                  * from the last not overwritten vector (wptr + 16). Hopefully
7727                  * this should allow us to catchup.
7728                  */
7729                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7730                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7731                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7732                 tmp = RREG32(IH_RB_CNTL);
7733                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7734                 WREG32(IH_RB_CNTL, tmp);
7735         }
7736         return (wptr & rdev->ih.ptr_mask);
7737 }
7738
7739 /*        CIK IV Ring
7740  * Each IV ring entry is 128 bits:
7741  * [7:0]    - interrupt source id
7742  * [31:8]   - reserved
7743  * [59:32]  - interrupt source data
7744  * [63:60]  - reserved
7745  * [71:64]  - RINGID
7746  *            CP:
7747  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7748  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7749  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7750  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7751  *            PIPE_ID - ME0 0=3D
7752  *                    - ME1&2 compute dispatcher (4 pipes each)
7753  *            SDMA:
7754  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7755  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7756  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7757  * [79:72]  - VMID
7758  * [95:80]  - PASID
7759  * [127:96] - reserved
7760  */
7761 /**
7762  * cik_irq_process - interrupt handler
7763  *
7764  * @rdev: radeon_device pointer
7765  *
7766  * Interrupt hander (CIK).  Walk the IH ring,
7767  * ack interrupts and schedule work to handle
7768  * interrupt events.
7769  * Returns irq process return code.
7770  */
7771 int cik_irq_process(struct radeon_device *rdev)
7772 {
7773         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7774         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7775         u32 wptr;
7776         u32 rptr;
7777         u32 src_id, src_data, ring_id;
7778         u8 me_id, pipe_id, queue_id;
7779         u32 ring_index;
7780         bool queue_hotplug = false;
7781         bool queue_reset = false;
7782         u32 addr, status, mc_client;
7783         bool queue_thermal = false;
7784
7785         if (!rdev->ih.enabled || rdev->shutdown)
7786                 return IRQ_NONE;
7787
7788         wptr = cik_get_ih_wptr(rdev);
7789
7790 restart_ih:
7791         /* is somebody else already processing irqs? */
7792         if (atomic_xchg(&rdev->ih.lock, 1))
7793                 return IRQ_NONE;
7794
7795         rptr = rdev->ih.rptr;
7796         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7797
7798         /* Order reading of wptr vs. reading of IH ring data */
7799         rmb();
7800
7801         /* display interrupts */
7802         cik_irq_ack(rdev);
7803
7804         while (rptr != wptr) {
7805                 /* wptr/rptr are in bytes! */
7806                 ring_index = rptr / 4;
7807
7808                 radeon_kfd_interrupt(rdev,
7809                                 (const void *) &rdev->ih.ring[ring_index]);
7810
7811                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7812                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7813                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7814
7815                 switch (src_id) {
7816                 case 1: /* D1 vblank/vline */
7817                         switch (src_data) {
7818                         case 0: /* D1 vblank */
7819                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7820                                         if (rdev->irq.crtc_vblank_int[0]) {
7821                                                 drm_handle_vblank(rdev->ddev, 0);
7822                                                 rdev->pm.vblank_sync = true;
7823                                                 wake_up(&rdev->irq.vblank_queue);
7824                                         }
7825                                         if (atomic_read(&rdev->irq.pflip[0]))
7826                                                 radeon_crtc_handle_vblank(rdev, 0);
7827                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7828                                         DRM_DEBUG("IH: D1 vblank\n");
7829                                 }
7830                                 break;
7831                         case 1: /* D1 vline */
7832                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7833                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7834                                         DRM_DEBUG("IH: D1 vline\n");
7835                                 }
7836                                 break;
7837                         default:
7838                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7839                                 break;
7840                         }
7841                         break;
7842                 case 2: /* D2 vblank/vline */
7843                         switch (src_data) {
7844                         case 0: /* D2 vblank */
7845                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7846                                         if (rdev->irq.crtc_vblank_int[1]) {
7847                                                 drm_handle_vblank(rdev->ddev, 1);
7848                                                 rdev->pm.vblank_sync = true;
7849                                                 wake_up(&rdev->irq.vblank_queue);
7850                                         }
7851                                         if (atomic_read(&rdev->irq.pflip[1]))
7852                                                 radeon_crtc_handle_vblank(rdev, 1);
7853                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7854                                         DRM_DEBUG("IH: D2 vblank\n");
7855                                 }
7856                                 break;
7857                         case 1: /* D2 vline */
7858                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7859                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7860                                         DRM_DEBUG("IH: D2 vline\n");
7861                                 }
7862                                 break;
7863                         default:
7864                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7865                                 break;
7866                         }
7867                         break;
7868                 case 3: /* D3 vblank/vline */
7869                         switch (src_data) {
7870                         case 0: /* D3 vblank */
7871                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7872                                         if (rdev->irq.crtc_vblank_int[2]) {
7873                                                 drm_handle_vblank(rdev->ddev, 2);
7874                                                 rdev->pm.vblank_sync = true;
7875                                                 wake_up(&rdev->irq.vblank_queue);
7876                                         }
7877                                         if (atomic_read(&rdev->irq.pflip[2]))
7878                                                 radeon_crtc_handle_vblank(rdev, 2);
7879                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7880                                         DRM_DEBUG("IH: D3 vblank\n");
7881                                 }
7882                                 break;
7883                         case 1: /* D3 vline */
7884                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7885                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7886                                         DRM_DEBUG("IH: D3 vline\n");
7887                                 }
7888                                 break;
7889                         default:
7890                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7891                                 break;
7892                         }
7893                         break;
7894                 case 4: /* D4 vblank/vline */
7895                         switch (src_data) {
7896                         case 0: /* D4 vblank */
7897                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7898                                         if (rdev->irq.crtc_vblank_int[3]) {
7899                                                 drm_handle_vblank(rdev->ddev, 3);
7900                                                 rdev->pm.vblank_sync = true;
7901                                                 wake_up(&rdev->irq.vblank_queue);
7902                                         }
7903                                         if (atomic_read(&rdev->irq.pflip[3]))
7904                                                 radeon_crtc_handle_vblank(rdev, 3);
7905                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7906                                         DRM_DEBUG("IH: D4 vblank\n");
7907                                 }
7908                                 break;
7909                         case 1: /* D4 vline */
7910                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7911                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7912                                         DRM_DEBUG("IH: D4 vline\n");
7913                                 }
7914                                 break;
7915                         default:
7916                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7917                                 break;
7918                         }
7919                         break;
7920                 case 5: /* D5 vblank/vline */
7921                         switch (src_data) {
7922                         case 0: /* D5 vblank */
7923                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7924                                         if (rdev->irq.crtc_vblank_int[4]) {
7925                                                 drm_handle_vblank(rdev->ddev, 4);
7926                                                 rdev->pm.vblank_sync = true;
7927                                                 wake_up(&rdev->irq.vblank_queue);
7928                                         }
7929                                         if (atomic_read(&rdev->irq.pflip[4]))
7930                                                 radeon_crtc_handle_vblank(rdev, 4);
7931                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7932                                         DRM_DEBUG("IH: D5 vblank\n");
7933                                 }
7934                                 break;
7935                         case 1: /* D5 vline */
7936                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7937                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7938                                         DRM_DEBUG("IH: D5 vline\n");
7939                                 }
7940                                 break;
7941                         default:
7942                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7943                                 break;
7944                         }
7945                         break;
7946                 case 6: /* D6 vblank/vline */
7947                         switch (src_data) {
7948                         case 0: /* D6 vblank */
7949                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7950                                         if (rdev->irq.crtc_vblank_int[5]) {
7951                                                 drm_handle_vblank(rdev->ddev, 5);
7952                                                 rdev->pm.vblank_sync = true;
7953                                                 wake_up(&rdev->irq.vblank_queue);
7954                                         }
7955                                         if (atomic_read(&rdev->irq.pflip[5]))
7956                                                 radeon_crtc_handle_vblank(rdev, 5);
7957                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7958                                         DRM_DEBUG("IH: D6 vblank\n");
7959                                 }
7960                                 break;
7961                         case 1: /* D6 vline */
7962                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7963                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7964                                         DRM_DEBUG("IH: D6 vline\n");
7965                                 }
7966                                 break;
7967                         default:
7968                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7969                                 break;
7970                         }
7971                         break;
7972                 case 8: /* D1 page flip */
7973                 case 10: /* D2 page flip */
7974                 case 12: /* D3 page flip */
7975                 case 14: /* D4 page flip */
7976                 case 16: /* D5 page flip */
7977                 case 18: /* D6 page flip */
7978                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7979                         if (radeon_use_pflipirq > 0)
7980                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7981                         break;
7982                 case 42: /* HPD hotplug */
7983                         switch (src_data) {
7984                         case 0:
7985                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7986                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7987                                         queue_hotplug = true;
7988                                         DRM_DEBUG("IH: HPD1\n");
7989                                 }
7990                                 break;
7991                         case 1:
7992                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7993                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7994                                         queue_hotplug = true;
7995                                         DRM_DEBUG("IH: HPD2\n");
7996                                 }
7997                                 break;
7998                         case 2:
7999                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8000                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8001                                         queue_hotplug = true;
8002                                         DRM_DEBUG("IH: HPD3\n");
8003                                 }
8004                                 break;
8005                         case 3:
8006                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8007                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8008                                         queue_hotplug = true;
8009                                         DRM_DEBUG("IH: HPD4\n");
8010                                 }
8011                                 break;
8012                         case 4:
8013                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8014                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8015                                         queue_hotplug = true;
8016                                         DRM_DEBUG("IH: HPD5\n");
8017                                 }
8018                                 break;
8019                         case 5:
8020                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8021                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8022                                         queue_hotplug = true;
8023                                         DRM_DEBUG("IH: HPD6\n");
8024                                 }
8025                                 break;
8026                         default:
8027                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8028                                 break;
8029                         }
8030                         break;
8031                 case 124: /* UVD */
8032                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8033                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8034                         break;
8035                 case 146:
8036                 case 147:
8037                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8038                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8039                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8040                         /* reset addr and status */
8041                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8042                         if (addr == 0x0 && status == 0x0)
8043                                 break;
8044                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8045                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8046                                 addr);
8047                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8048                                 status);
8049                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8050                         break;
8051                 case 167: /* VCE */
8052                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8053                         switch (src_data) {
8054                         case 0:
8055                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8056                                 break;
8057                         case 1:
8058                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8059                                 break;
8060                         default:
8061                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8062                                 break;
8063                         }
8064                         break;
8065                 case 176: /* GFX RB CP_INT */
8066                 case 177: /* GFX IB CP_INT */
8067                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8068                         break;
8069                 case 181: /* CP EOP event */
8070                         DRM_DEBUG("IH: CP EOP\n");
8071                         /* XXX check the bitfield order! */
8072                         me_id = (ring_id & 0x60) >> 5;
8073                         pipe_id = (ring_id & 0x18) >> 3;
8074                         queue_id = (ring_id & 0x7) >> 0;
8075                         switch (me_id) {
8076                         case 0:
8077                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8078                                 break;
8079                         case 1:
8080                         case 2:
8081                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8082                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8083                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8084                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8085                                 break;
8086                         }
8087                         break;
8088                 case 184: /* CP Privileged reg access */
8089                         DRM_ERROR("Illegal register access in command stream\n");
8090                         /* XXX check the bitfield order! */
8091                         me_id = (ring_id & 0x60) >> 5;
8092                         pipe_id = (ring_id & 0x18) >> 3;
8093                         queue_id = (ring_id & 0x7) >> 0;
8094                         switch (me_id) {
8095                         case 0:
8096                                 /* This results in a full GPU reset, but all we need to do is soft
8097                                  * reset the CP for gfx
8098                                  */
8099                                 queue_reset = true;
8100                                 break;
8101                         case 1:
8102                                 /* XXX compute */
8103                                 queue_reset = true;
8104                                 break;
8105                         case 2:
8106                                 /* XXX compute */
8107                                 queue_reset = true;
8108                                 break;
8109                         }
8110                         break;
8111                 case 185: /* CP Privileged inst */
8112                         DRM_ERROR("Illegal instruction in command stream\n");
8113                         /* XXX check the bitfield order! */
8114                         me_id = (ring_id & 0x60) >> 5;
8115                         pipe_id = (ring_id & 0x18) >> 3;
8116                         queue_id = (ring_id & 0x7) >> 0;
8117                         switch (me_id) {
8118                         case 0:
8119                                 /* This results in a full GPU reset, but all we need to do is soft
8120                                  * reset the CP for gfx
8121                                  */
8122                                 queue_reset = true;
8123                                 break;
8124                         case 1:
8125                                 /* XXX compute */
8126                                 queue_reset = true;
8127                                 break;
8128                         case 2:
8129                                 /* XXX compute */
8130                                 queue_reset = true;
8131                                 break;
8132                         }
8133                         break;
8134                 case 224: /* SDMA trap event */
8135                         /* XXX check the bitfield order! */
8136                         me_id = (ring_id & 0x3) >> 0;
8137                         queue_id = (ring_id & 0xc) >> 2;
8138                         DRM_DEBUG("IH: SDMA trap\n");
8139                         switch (me_id) {
8140                         case 0:
8141                                 switch (queue_id) {
8142                                 case 0:
8143                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8144                                         break;
8145                                 case 1:
8146                                         /* XXX compute */
8147                                         break;
8148                                 case 2:
8149                                         /* XXX compute */
8150                                         break;
8151                                 }
8152                                 break;
8153                         case 1:
8154                                 switch (queue_id) {
8155                                 case 0:
8156                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8157                                         break;
8158                                 case 1:
8159                                         /* XXX compute */
8160                                         break;
8161                                 case 2:
8162                                         /* XXX compute */
8163                                         break;
8164                                 }
8165                                 break;
8166                         }
8167                         break;
8168                 case 230: /* thermal low to high */
8169                         DRM_DEBUG("IH: thermal low to high\n");
8170                         rdev->pm.dpm.thermal.high_to_low = false;
8171                         queue_thermal = true;
8172                         break;
8173                 case 231: /* thermal high to low */
8174                         DRM_DEBUG("IH: thermal high to low\n");
8175                         rdev->pm.dpm.thermal.high_to_low = true;
8176                         queue_thermal = true;
8177                         break;
8178                 case 233: /* GUI IDLE */
8179                         DRM_DEBUG("IH: GUI idle\n");
8180                         break;
8181                 case 241: /* SDMA Privileged inst */
8182                 case 247: /* SDMA Privileged inst */
8183                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8184                         /* XXX check the bitfield order! */
8185                         me_id = (ring_id & 0x3) >> 0;
8186                         queue_id = (ring_id & 0xc) >> 2;
8187                         switch (me_id) {
8188                         case 0:
8189                                 switch (queue_id) {
8190                                 case 0:
8191                                         queue_reset = true;
8192                                         break;
8193                                 case 1:
8194                                         /* XXX compute */
8195                                         queue_reset = true;
8196                                         break;
8197                                 case 2:
8198                                         /* XXX compute */
8199                                         queue_reset = true;
8200                                         break;
8201                                 }
8202                                 break;
8203                         case 1:
8204                                 switch (queue_id) {
8205                                 case 0:
8206                                         queue_reset = true;
8207                                         break;
8208                                 case 1:
8209                                         /* XXX compute */
8210                                         queue_reset = true;
8211                                         break;
8212                                 case 2:
8213                                         /* XXX compute */
8214                                         queue_reset = true;
8215                                         break;
8216                                 }
8217                                 break;
8218                         }
8219                         break;
8220                 default:
8221                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8222                         break;
8223                 }
8224
8225                 /* wptr/rptr are in bytes! */
8226                 rptr += 16;
8227                 rptr &= rdev->ih.ptr_mask;
8228                 WREG32(IH_RB_RPTR, rptr);
8229         }
8230         if (queue_hotplug)
8231                 schedule_work(&rdev->hotplug_work);
8232         if (queue_reset) {
8233                 rdev->needs_reset = true;
8234                 wake_up_all(&rdev->fence_queue);
8235         }
8236         if (queue_thermal)
8237                 schedule_work(&rdev->pm.dpm.thermal.work);
8238         rdev->ih.rptr = rptr;
8239         atomic_set(&rdev->ih.lock, 0);
8240
8241         /* make sure wptr hasn't changed while processing */
8242         wptr = cik_get_ih_wptr(rdev);
8243         if (wptr != rptr)
8244                 goto restart_ih;
8245
8246         return IRQ_HANDLED;
8247 }
8248
8249 /*
8250  * startup/shutdown callbacks
8251  */
8252 /**
8253  * cik_startup - program the asic to a functional state
8254  *
8255  * @rdev: radeon_device pointer
8256  *
8257  * Programs the asic to a functional state (CIK).
8258  * Called by cik_init() and cik_resume().
8259  * Returns 0 for success, error for failure.
8260  */
8261 static int cik_startup(struct radeon_device *rdev)
8262 {
8263         struct radeon_ring *ring;
8264         u32 nop;
8265         int r;
8266
8267         /* enable pcie gen2/3 link */
8268         cik_pcie_gen3_enable(rdev);
8269         /* enable aspm */
8270         cik_program_aspm(rdev);
8271
8272         /* scratch needs to be initialized before MC */
8273         r = r600_vram_scratch_init(rdev);
8274         if (r)
8275                 return r;
8276
8277         cik_mc_program(rdev);
8278
8279         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8280                 r = ci_mc_load_microcode(rdev);
8281                 if (r) {
8282                         DRM_ERROR("Failed to load MC firmware!\n");
8283                         return r;
8284                 }
8285         }
8286
8287         r = cik_pcie_gart_enable(rdev);
8288         if (r)
8289                 return r;
8290         cik_gpu_init(rdev);
8291
8292         /* allocate rlc buffers */
8293         if (rdev->flags & RADEON_IS_IGP) {
8294                 if (rdev->family == CHIP_KAVERI) {
8295                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8296                         rdev->rlc.reg_list_size =
8297                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8298                 } else {
8299                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8300                         rdev->rlc.reg_list_size =
8301                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8302                 }
8303         }
8304         rdev->rlc.cs_data = ci_cs_data;
8305         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8306         r = sumo_rlc_init(rdev);
8307         if (r) {
8308                 DRM_ERROR("Failed to init rlc BOs!\n");
8309                 return r;
8310         }
8311
8312         /* allocate wb buffer */
8313         r = radeon_wb_init(rdev);
8314         if (r)
8315                 return r;
8316
8317         /* allocate mec buffers */
8318         r = cik_mec_init(rdev);
8319         if (r) {
8320                 DRM_ERROR("Failed to init MEC BOs!\n");
8321                 return r;
8322         }
8323
8324         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8325         if (r) {
8326                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8327                 return r;
8328         }
8329
8330         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8331         if (r) {
8332                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8333                 return r;
8334         }
8335
8336         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8337         if (r) {
8338                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8339                 return r;
8340         }
8341
8342         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8343         if (r) {
8344                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8345                 return r;
8346         }
8347
8348         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8349         if (r) {
8350                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8351                 return r;
8352         }
8353
8354         r = radeon_uvd_resume(rdev);
8355         if (!r) {
8356                 r = uvd_v4_2_resume(rdev);
8357                 if (!r) {
8358                         r = radeon_fence_driver_start_ring(rdev,
8359                                                            R600_RING_TYPE_UVD_INDEX);
8360                         if (r)
8361                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8362                 }
8363         }
8364         if (r)
8365                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8366
8367         r = radeon_vce_resume(rdev);
8368         if (!r) {
8369                 r = vce_v2_0_resume(rdev);
8370                 if (!r)
8371                         r = radeon_fence_driver_start_ring(rdev,
8372                                                            TN_RING_TYPE_VCE1_INDEX);
8373                 if (!r)
8374                         r = radeon_fence_driver_start_ring(rdev,
8375                                                            TN_RING_TYPE_VCE2_INDEX);
8376         }
8377         if (r) {
8378                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8379                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8380                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8381         }
8382
8383         /* Enable IRQ */
8384         if (!rdev->irq.installed) {
8385                 r = radeon_irq_kms_init(rdev);
8386                 if (r)
8387                         return r;
8388         }
8389
8390         r = cik_irq_init(rdev);
8391         if (r) {
8392                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8393                 radeon_irq_kms_fini(rdev);
8394                 return r;
8395         }
8396         cik_irq_set(rdev);
8397
8398         if (rdev->family == CHIP_HAWAII) {
8399                 if (rdev->new_fw)
8400                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8401                 else
8402                         nop = RADEON_CP_PACKET2;
8403         } else {
8404                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8405         }
8406
8407         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8408         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8409                              nop);
8410         if (r)
8411                 return r;
8412
8413         /* set up the compute queues */
8414         /* type-2 packets are deprecated on MEC, use type-3 instead */
8415         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8416         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8417                              nop);
8418         if (r)
8419                 return r;
8420         ring->me = 1; /* first MEC */
8421         ring->pipe = 0; /* first pipe */
8422         ring->queue = 0; /* first queue */
8423         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8424
8425         /* type-2 packets are deprecated on MEC, use type-3 instead */
8426         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8427         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8428                              nop);
8429         if (r)
8430                 return r;
8431         /* dGPU only have 1 MEC */
8432         ring->me = 1; /* first MEC */
8433         ring->pipe = 0; /* first pipe */
8434         ring->queue = 1; /* second queue */
8435         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8436
8437         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8438         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8439                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8440         if (r)
8441                 return r;
8442
8443         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8444         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8445                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8446         if (r)
8447                 return r;
8448
8449         r = cik_cp_resume(rdev);
8450         if (r)
8451                 return r;
8452
8453         r = cik_sdma_resume(rdev);
8454         if (r)
8455                 return r;
8456
8457         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8458         if (ring->ring_size) {
8459                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8460                                      RADEON_CP_PACKET2);
8461                 if (!r)
8462                         r = uvd_v1_0_init(rdev);
8463                 if (r)
8464                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8465         }
8466
8467         r = -ENOENT;
8468
8469         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8470         if (ring->ring_size)
8471                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8472                                      VCE_CMD_NO_OP);
8473
8474         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8475         if (ring->ring_size)
8476                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8477                                      VCE_CMD_NO_OP);
8478
8479         if (!r)
8480                 r = vce_v1_0_init(rdev);
8481         else if (r != -ENOENT)
8482                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8483
8484         r = radeon_ib_pool_init(rdev);
8485         if (r) {
8486                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8487                 return r;
8488         }
8489
8490         r = radeon_vm_manager_init(rdev);
8491         if (r) {
8492                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8493                 return r;
8494         }
8495
8496         r = dce6_audio_init(rdev);
8497         if (r)
8498                 return r;
8499
8500         r = radeon_kfd_resume(rdev);
8501         if (r)
8502                 return r;
8503
8504         return 0;
8505 }
8506
8507 /**
8508  * cik_resume - resume the asic to a functional state
8509  *
8510  * @rdev: radeon_device pointer
8511  *
8512  * Programs the asic to a functional state (CIK).
8513  * Called at resume.
8514  * Returns 0 for success, error for failure.
8515  */
8516 int cik_resume(struct radeon_device *rdev)
8517 {
8518         int r;
8519
8520         /* post card */
8521         atom_asic_init(rdev->mode_info.atom_context);
8522
8523         /* init golden registers */
8524         cik_init_golden_registers(rdev);
8525
8526         if (rdev->pm.pm_method == PM_METHOD_DPM)
8527                 radeon_pm_resume(rdev);
8528
8529         rdev->accel_working = true;
8530         r = cik_startup(rdev);
8531         if (r) {
8532                 DRM_ERROR("cik startup failed on resume\n");
8533                 rdev->accel_working = false;
8534                 return r;
8535         }
8536
8537         return r;
8538
8539 }
8540
8541 /**
8542  * cik_suspend - suspend the asic
8543  *
8544  * @rdev: radeon_device pointer
8545  *
8546  * Bring the chip into a state suitable for suspend (CIK).
8547  * Called at suspend.
8548  * Returns 0 for success.
8549  */
8550 int cik_suspend(struct radeon_device *rdev)
8551 {
8552         radeon_kfd_suspend(rdev);
8553         radeon_pm_suspend(rdev);
8554         dce6_audio_fini(rdev);
8555         radeon_vm_manager_fini(rdev);
8556         cik_cp_enable(rdev, false);
8557         cik_sdma_enable(rdev, false);
8558         uvd_v1_0_fini(rdev);
8559         radeon_uvd_suspend(rdev);
8560         radeon_vce_suspend(rdev);
8561         cik_fini_pg(rdev);
8562         cik_fini_cg(rdev);
8563         cik_irq_suspend(rdev);
8564         radeon_wb_disable(rdev);
8565         cik_pcie_gart_disable(rdev);
8566         return 0;
8567 }
8568
8569 /* Plan is to move initialization in that function and use
8570  * helper function so that radeon_device_init pretty much
8571  * do nothing more than calling asic specific function. This
8572  * should also allow to remove a bunch of callback function
8573  * like vram_info.
8574  */
8575 /**
8576  * cik_init - asic specific driver and hw init
8577  *
8578  * @rdev: radeon_device pointer
8579  *
8580  * Setup asic specific driver variables and program the hw
8581  * to a functional state (CIK).
8582  * Called at driver startup.
8583  * Returns 0 for success, errors for failure.
8584  */
8585 int cik_init(struct radeon_device *rdev)
8586 {
8587         struct radeon_ring *ring;
8588         int r;
8589
8590         /* Read BIOS */
8591         if (!radeon_get_bios(rdev)) {
8592                 if (ASIC_IS_AVIVO(rdev))
8593                         return -EINVAL;
8594         }
8595         /* Must be an ATOMBIOS */
8596         if (!rdev->is_atom_bios) {
8597                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8598                 return -EINVAL;
8599         }
8600         r = radeon_atombios_init(rdev);
8601         if (r)
8602                 return r;
8603
8604         /* Post card if necessary */
8605         if (!radeon_card_posted(rdev)) {
8606                 if (!rdev->bios) {
8607                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8608                         return -EINVAL;
8609                 }
8610                 DRM_INFO("GPU not posted. posting now...\n");
8611                 atom_asic_init(rdev->mode_info.atom_context);
8612         }
8613         /* init golden registers */
8614         cik_init_golden_registers(rdev);
8615         /* Initialize scratch registers */
8616         cik_scratch_init(rdev);
8617         /* Initialize surface registers */
8618         radeon_surface_init(rdev);
8619         /* Initialize clocks */
8620         radeon_get_clock_info(rdev->ddev);
8621
8622         /* Fence driver */
8623         r = radeon_fence_driver_init(rdev);
8624         if (r)
8625                 return r;
8626
8627         /* initialize memory controller */
8628         r = cik_mc_init(rdev);
8629         if (r)
8630                 return r;
8631         /* Memory manager */
8632         r = radeon_bo_init(rdev);
8633         if (r)
8634                 return r;
8635
8636         if (rdev->flags & RADEON_IS_IGP) {
8637                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8638                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8639                         r = cik_init_microcode(rdev);
8640                         if (r) {
8641                                 DRM_ERROR("Failed to load firmware!\n");
8642                                 return r;
8643                         }
8644                 }
8645         } else {
8646                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8647                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8648                     !rdev->mc_fw) {
8649                         r = cik_init_microcode(rdev);
8650                         if (r) {
8651                                 DRM_ERROR("Failed to load firmware!\n");
8652                                 return r;
8653                         }
8654                 }
8655         }
8656
8657         /* Initialize power management */
8658         radeon_pm_init(rdev);
8659
8660         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8661         ring->ring_obj = NULL;
8662         r600_ring_init(rdev, ring, 1024 * 1024);
8663
8664         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8665         ring->ring_obj = NULL;
8666         r600_ring_init(rdev, ring, 1024 * 1024);
8667         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8668         if (r)
8669                 return r;
8670
8671         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8672         ring->ring_obj = NULL;
8673         r600_ring_init(rdev, ring, 1024 * 1024);
8674         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8675         if (r)
8676                 return r;
8677
8678         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8679         ring->ring_obj = NULL;
8680         r600_ring_init(rdev, ring, 256 * 1024);
8681
8682         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8683         ring->ring_obj = NULL;
8684         r600_ring_init(rdev, ring, 256 * 1024);
8685
8686         r = radeon_uvd_init(rdev);
8687         if (!r) {
8688                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8689                 ring->ring_obj = NULL;
8690                 r600_ring_init(rdev, ring, 4096);
8691         }
8692
8693         r = radeon_vce_init(rdev);
8694         if (!r) {
8695                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8696                 ring->ring_obj = NULL;
8697                 r600_ring_init(rdev, ring, 4096);
8698
8699                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8700                 ring->ring_obj = NULL;
8701                 r600_ring_init(rdev, ring, 4096);
8702         }
8703
8704         rdev->ih.ring_obj = NULL;
8705         r600_ih_ring_init(rdev, 64 * 1024);
8706
8707         r = r600_pcie_gart_init(rdev);
8708         if (r)
8709                 return r;
8710
8711         rdev->accel_working = true;
8712         r = cik_startup(rdev);
8713         if (r) {
8714                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8715                 cik_cp_fini(rdev);
8716                 cik_sdma_fini(rdev);
8717                 cik_irq_fini(rdev);
8718                 sumo_rlc_fini(rdev);
8719                 cik_mec_fini(rdev);
8720                 radeon_wb_fini(rdev);
8721                 radeon_ib_pool_fini(rdev);
8722                 radeon_vm_manager_fini(rdev);
8723                 radeon_irq_kms_fini(rdev);
8724                 cik_pcie_gart_fini(rdev);
8725                 rdev->accel_working = false;
8726         }
8727
8728         /* Don't start up if the MC ucode is missing.
8729          * The default clocks and voltages before the MC ucode
8730          * is loaded are not suffient for advanced operations.
8731          */
8732         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8733                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8734                 return -EINVAL;
8735         }
8736
8737         return 0;
8738 }
8739
8740 /**
8741  * cik_fini - asic specific driver and hw fini
8742  *
8743  * @rdev: radeon_device pointer
8744  *
8745  * Tear down the asic specific driver variables and program the hw
8746  * to an idle state (CIK).
8747  * Called at driver unload.
8748  */
8749 void cik_fini(struct radeon_device *rdev)
8750 {
8751         radeon_pm_fini(rdev);
8752         cik_cp_fini(rdev);
8753         cik_sdma_fini(rdev);
8754         cik_fini_pg(rdev);
8755         cik_fini_cg(rdev);
8756         cik_irq_fini(rdev);
8757         sumo_rlc_fini(rdev);
8758         cik_mec_fini(rdev);
8759         radeon_wb_fini(rdev);
8760         radeon_vm_manager_fini(rdev);
8761         radeon_ib_pool_fini(rdev);
8762         radeon_irq_kms_fini(rdev);
8763         uvd_v1_0_fini(rdev);
8764         radeon_uvd_fini(rdev);
8765         radeon_vce_fini(rdev);
8766         cik_pcie_gart_fini(rdev);
8767         r600_vram_scratch_fini(rdev);
8768         radeon_gem_fini(rdev);
8769         radeon_fence_driver_fini(rdev);
8770         radeon_bo_fini(rdev);
8771         radeon_atombios_fini(rdev);
8772         kfree(rdev->bios);
8773         rdev->bios = NULL;
8774 }
8775
8776 void dce8_program_fmt(struct drm_encoder *encoder)
8777 {
8778         struct drm_device *dev = encoder->dev;
8779         struct radeon_device *rdev = dev->dev_private;
8780         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8781         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8782         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8783         int bpc = 0;
8784         u32 tmp = 0;
8785         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8786
8787         if (connector) {
8788                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8789                 bpc = radeon_get_monitor_bpc(connector);
8790                 dither = radeon_connector->dither;
8791         }
8792
8793         /* LVDS/eDP FMT is set up by atom */
8794         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8795                 return;
8796
8797         /* not needed for analog */
8798         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8799             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8800                 return;
8801
8802         if (bpc == 0)
8803                 return;
8804
8805         switch (bpc) {
8806         case 6:
8807                 if (dither == RADEON_FMT_DITHER_ENABLE)
8808                         /* XXX sort out optimal dither settings */
8809                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8810                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8811                 else
8812                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8813                 break;
8814         case 8:
8815                 if (dither == RADEON_FMT_DITHER_ENABLE)
8816                         /* XXX sort out optimal dither settings */
8817                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8818                                 FMT_RGB_RANDOM_ENABLE |
8819                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8820                 else
8821                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8822                 break;
8823         case 10:
8824                 if (dither == RADEON_FMT_DITHER_ENABLE)
8825                         /* XXX sort out optimal dither settings */
8826                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8827                                 FMT_RGB_RANDOM_ENABLE |
8828                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8829                 else
8830                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8831                 break;
8832         default:
8833                 /* not needed */
8834                 break;
8835         }
8836
8837         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8838 }
8839
8840 /* display watermark setup */
8841 /**
8842  * dce8_line_buffer_adjust - Set up the line buffer
8843  *
8844  * @rdev: radeon_device pointer
8845  * @radeon_crtc: the selected display controller
8846  * @mode: the current display mode on the selected display
8847  * controller
8848  *
8849  * Setup up the line buffer allocation for
8850  * the selected display controller (CIK).
8851  * Returns the line buffer size in pixels.
8852  */
8853 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8854                                    struct radeon_crtc *radeon_crtc,
8855                                    struct drm_display_mode *mode)
8856 {
8857         u32 tmp, buffer_alloc, i;
8858         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8859         /*
8860          * Line Buffer Setup
8861          * There are 6 line buffers, one for each display controllers.
8862          * There are 3 partitions per LB. Select the number of partitions
8863          * to enable based on the display width.  For display widths larger
8864          * than 4096, you need use to use 2 display controllers and combine
8865          * them using the stereo blender.
8866          */
8867         if (radeon_crtc->base.enabled && mode) {
8868                 if (mode->crtc_hdisplay < 1920) {
8869                         tmp = 1;
8870                         buffer_alloc = 2;
8871                 } else if (mode->crtc_hdisplay < 2560) {
8872                         tmp = 2;
8873                         buffer_alloc = 2;
8874                 } else if (mode->crtc_hdisplay < 4096) {
8875                         tmp = 0;
8876                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8877                 } else {
8878                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8879                         tmp = 0;
8880                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8881                 }
8882         } else {
8883                 tmp = 1;
8884                 buffer_alloc = 0;
8885         }
8886
8887         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8888                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8889
8890         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8891                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8892         for (i = 0; i < rdev->usec_timeout; i++) {
8893                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8894                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8895                         break;
8896                 udelay(1);
8897         }
8898
8899         if (radeon_crtc->base.enabled && mode) {
8900                 switch (tmp) {
8901                 case 0:
8902                 default:
8903                         return 4096 * 2;
8904                 case 1:
8905                         return 1920 * 2;
8906                 case 2:
8907                         return 2560 * 2;
8908                 }
8909         }
8910
8911         /* controller not enabled, so no lb used */
8912         return 0;
8913 }
8914
8915 /**
8916  * cik_get_number_of_dram_channels - get the number of dram channels
8917  *
8918  * @rdev: radeon_device pointer
8919  *
8920  * Look up the number of video ram channels (CIK).
8921  * Used for display watermark bandwidth calculations
8922  * Returns the number of dram channels
8923  */
8924 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8925 {
8926         u32 tmp = RREG32(MC_SHARED_CHMAP);
8927
8928         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8929         case 0:
8930         default:
8931                 return 1;
8932         case 1:
8933                 return 2;
8934         case 2:
8935                 return 4;
8936         case 3:
8937                 return 8;
8938         case 4:
8939                 return 3;
8940         case 5:
8941                 return 6;
8942         case 6:
8943                 return 10;
8944         case 7:
8945                 return 12;
8946         case 8:
8947                 return 16;
8948         }
8949 }
8950
8951 struct dce8_wm_params {
8952         u32 dram_channels; /* number of dram channels */
8953         u32 yclk;          /* bandwidth per dram data pin in kHz */
8954         u32 sclk;          /* engine clock in kHz */
8955         u32 disp_clk;      /* display clock in kHz */
8956         u32 src_width;     /* viewport width */
8957         u32 active_time;   /* active display time in ns */
8958         u32 blank_time;    /* blank time in ns */
8959         bool interlaced;    /* mode is interlaced */
8960         fixed20_12 vsc;    /* vertical scale ratio */
8961         u32 num_heads;     /* number of active crtcs */
8962         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8963         u32 lb_size;       /* line buffer allocated to pipe */
8964         u32 vtaps;         /* vertical scaler taps */
8965 };
8966
8967 /**
8968  * dce8_dram_bandwidth - get the dram bandwidth
8969  *
8970  * @wm: watermark calculation data
8971  *
8972  * Calculate the raw dram bandwidth (CIK).
8973  * Used for display watermark bandwidth calculations
8974  * Returns the dram bandwidth in MBytes/s
8975  */
8976 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8977 {
8978         /* Calculate raw DRAM Bandwidth */
8979         fixed20_12 dram_efficiency; /* 0.7 */
8980         fixed20_12 yclk, dram_channels, bandwidth;
8981         fixed20_12 a;
8982
8983         a.full = dfixed_const(1000);
8984         yclk.full = dfixed_const(wm->yclk);
8985         yclk.full = dfixed_div(yclk, a);
8986         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8987         a.full = dfixed_const(10);
8988         dram_efficiency.full = dfixed_const(7);
8989         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8990         bandwidth.full = dfixed_mul(dram_channels, yclk);
8991         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8992
8993         return dfixed_trunc(bandwidth);
8994 }
8995
8996 /**
8997  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8998  *
8999  * @wm: watermark calculation data
9000  *
9001  * Calculate the dram bandwidth used for display (CIK).
9002  * Used for display watermark bandwidth calculations
9003  * Returns the dram bandwidth for display in MBytes/s
9004  */
9005 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9006 {
9007         /* Calculate DRAM Bandwidth and the part allocated to display. */
9008         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9009         fixed20_12 yclk, dram_channels, bandwidth;
9010         fixed20_12 a;
9011
9012         a.full = dfixed_const(1000);
9013         yclk.full = dfixed_const(wm->yclk);
9014         yclk.full = dfixed_div(yclk, a);
9015         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9016         a.full = dfixed_const(10);
9017         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9018         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9019         bandwidth.full = dfixed_mul(dram_channels, yclk);
9020         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9021
9022         return dfixed_trunc(bandwidth);
9023 }
9024
9025 /**
9026  * dce8_data_return_bandwidth - get the data return bandwidth
9027  *
9028  * @wm: watermark calculation data
9029  *
9030  * Calculate the data return bandwidth used for display (CIK).
9031  * Used for display watermark bandwidth calculations
9032  * Returns the data return bandwidth in MBytes/s
9033  */
9034 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9035 {
9036         /* Calculate the display Data return Bandwidth */
9037         fixed20_12 return_efficiency; /* 0.8 */
9038         fixed20_12 sclk, bandwidth;
9039         fixed20_12 a;
9040
9041         a.full = dfixed_const(1000);
9042         sclk.full = dfixed_const(wm->sclk);
9043         sclk.full = dfixed_div(sclk, a);
9044         a.full = dfixed_const(10);
9045         return_efficiency.full = dfixed_const(8);
9046         return_efficiency.full = dfixed_div(return_efficiency, a);
9047         a.full = dfixed_const(32);
9048         bandwidth.full = dfixed_mul(a, sclk);
9049         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9050
9051         return dfixed_trunc(bandwidth);
9052 }
9053
9054 /**
9055  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9056  *
9057  * @wm: watermark calculation data
9058  *
9059  * Calculate the dmif bandwidth used for display (CIK).
9060  * Used for display watermark bandwidth calculations
9061  * Returns the dmif bandwidth in MBytes/s
9062  */
9063 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9064 {
9065         /* Calculate the DMIF Request Bandwidth */
9066         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9067         fixed20_12 disp_clk, bandwidth;
9068         fixed20_12 a, b;
9069
9070         a.full = dfixed_const(1000);
9071         disp_clk.full = dfixed_const(wm->disp_clk);
9072         disp_clk.full = dfixed_div(disp_clk, a);
9073         a.full = dfixed_const(32);
9074         b.full = dfixed_mul(a, disp_clk);
9075
9076         a.full = dfixed_const(10);
9077         disp_clk_request_efficiency.full = dfixed_const(8);
9078         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9079
9080         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9081
9082         return dfixed_trunc(bandwidth);
9083 }
9084
9085 /**
9086  * dce8_available_bandwidth - get the min available bandwidth
9087  *
9088  * @wm: watermark calculation data
9089  *
9090  * Calculate the min available bandwidth used for display (CIK).
9091  * Used for display watermark bandwidth calculations
9092  * Returns the min available bandwidth in MBytes/s
9093  */
9094 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9095 {
9096         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9097         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9098         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9099         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9100
9101         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9102 }
9103
9104 /**
9105  * dce8_average_bandwidth - get the average available bandwidth
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the average available bandwidth used for display (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the average available bandwidth in MBytes/s
9112  */
9113 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9114 {
9115         /* Calculate the display mode Average Bandwidth
9116          * DisplayMode should contain the source and destination dimensions,
9117          * timing, etc.
9118          */
9119         fixed20_12 bpp;
9120         fixed20_12 line_time;
9121         fixed20_12 src_width;
9122         fixed20_12 bandwidth;
9123         fixed20_12 a;
9124
9125         a.full = dfixed_const(1000);
9126         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9127         line_time.full = dfixed_div(line_time, a);
9128         bpp.full = dfixed_const(wm->bytes_per_pixel);
9129         src_width.full = dfixed_const(wm->src_width);
9130         bandwidth.full = dfixed_mul(src_width, bpp);
9131         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9132         bandwidth.full = dfixed_div(bandwidth, line_time);
9133
9134         return dfixed_trunc(bandwidth);
9135 }
9136
9137 /**
9138  * dce8_latency_watermark - get the latency watermark
9139  *
9140  * @wm: watermark calculation data
9141  *
9142  * Calculate the latency watermark (CIK).
9143  * Used for display watermark bandwidth calculations
9144  * Returns the latency watermark in ns
9145  */
9146 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9147 {
9148         /* First calculate the latency in ns */
9149         u32 mc_latency = 2000; /* 2000 ns. */
9150         u32 available_bandwidth = dce8_available_bandwidth(wm);
9151         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9152         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9153         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9154         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9155                 (wm->num_heads * cursor_line_pair_return_time);
9156         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9157         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9158         u32 tmp, dmif_size = 12288;
9159         fixed20_12 a, b, c;
9160
9161         if (wm->num_heads == 0)
9162                 return 0;
9163
9164         a.full = dfixed_const(2);
9165         b.full = dfixed_const(1);
9166         if ((wm->vsc.full > a.full) ||
9167             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9168             (wm->vtaps >= 5) ||
9169             ((wm->vsc.full >= a.full) && wm->interlaced))
9170                 max_src_lines_per_dst_line = 4;
9171         else
9172                 max_src_lines_per_dst_line = 2;
9173
9174         a.full = dfixed_const(available_bandwidth);
9175         b.full = dfixed_const(wm->num_heads);
9176         a.full = dfixed_div(a, b);
9177
9178         b.full = dfixed_const(mc_latency + 512);
9179         c.full = dfixed_const(wm->disp_clk);
9180         b.full = dfixed_div(b, c);
9181
9182         c.full = dfixed_const(dmif_size);
9183         b.full = dfixed_div(c, b);
9184
9185         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9186
9187         b.full = dfixed_const(1000);
9188         c.full = dfixed_const(wm->disp_clk);
9189         b.full = dfixed_div(c, b);
9190         c.full = dfixed_const(wm->bytes_per_pixel);
9191         b.full = dfixed_mul(b, c);
9192
9193         lb_fill_bw = min(tmp, dfixed_trunc(b));
9194
9195         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9196         b.full = dfixed_const(1000);
9197         c.full = dfixed_const(lb_fill_bw);
9198         b.full = dfixed_div(c, b);
9199         a.full = dfixed_div(a, b);
9200         line_fill_time = dfixed_trunc(a);
9201
9202         if (line_fill_time < wm->active_time)
9203                 return latency;
9204         else
9205                 return latency + (line_fill_time - wm->active_time);
9206
9207 }
9208
9209 /**
9210  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9211  * average and available dram bandwidth
9212  *
9213  * @wm: watermark calculation data
9214  *
9215  * Check if the display average bandwidth fits in the display
9216  * dram bandwidth (CIK).
9217  * Used for display watermark bandwidth calculations
9218  * Returns true if the display fits, false if not.
9219  */
9220 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9221 {
9222         if (dce8_average_bandwidth(wm) <=
9223             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9224                 return true;
9225         else
9226                 return false;
9227 }
9228
9229 /**
9230  * dce8_average_bandwidth_vs_available_bandwidth - check
9231  * average and available bandwidth
9232  *
9233  * @wm: watermark calculation data
9234  *
9235  * Check if the display average bandwidth fits in the display
9236  * available bandwidth (CIK).
9237  * Used for display watermark bandwidth calculations
9238  * Returns true if the display fits, false if not.
9239  */
9240 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9241 {
9242         if (dce8_average_bandwidth(wm) <=
9243             (dce8_available_bandwidth(wm) / wm->num_heads))
9244                 return true;
9245         else
9246                 return false;
9247 }
9248
9249 /**
9250  * dce8_check_latency_hiding - check latency hiding
9251  *
9252  * @wm: watermark calculation data
9253  *
9254  * Check latency hiding (CIK).
9255  * Used for display watermark bandwidth calculations
9256  * Returns true if the display fits, false if not.
9257  */
9258 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9259 {
9260         u32 lb_partitions = wm->lb_size / wm->src_width;
9261         u32 line_time = wm->active_time + wm->blank_time;
9262         u32 latency_tolerant_lines;
9263         u32 latency_hiding;
9264         fixed20_12 a;
9265
9266         a.full = dfixed_const(1);
9267         if (wm->vsc.full > a.full)
9268                 latency_tolerant_lines = 1;
9269         else {
9270                 if (lb_partitions <= (wm->vtaps + 1))
9271                         latency_tolerant_lines = 1;
9272                 else
9273                         latency_tolerant_lines = 2;
9274         }
9275
9276         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9277
9278         if (dce8_latency_watermark(wm) <= latency_hiding)
9279                 return true;
9280         else
9281                 return false;
9282 }
9283
9284 /**
9285  * dce8_program_watermarks - program display watermarks
9286  *
9287  * @rdev: radeon_device pointer
9288  * @radeon_crtc: the selected display controller
9289  * @lb_size: line buffer size
9290  * @num_heads: number of display controllers in use
9291  *
9292  * Calculate and program the display watermarks for the
9293  * selected display controller (CIK).
9294  */
9295 static void dce8_program_watermarks(struct radeon_device *rdev,
9296                                     struct radeon_crtc *radeon_crtc,
9297                                     u32 lb_size, u32 num_heads)
9298 {
9299         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9300         struct dce8_wm_params wm_low, wm_high;
9301         u32 pixel_period;
9302         u32 line_time = 0;
9303         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9304         u32 tmp, wm_mask;
9305
9306         if (radeon_crtc->base.enabled && num_heads && mode) {
9307                 pixel_period = 1000000 / (u32)mode->clock;
9308                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9309
9310                 /* watermark for high clocks */
9311                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9312                     rdev->pm.dpm_enabled) {
9313                         wm_high.yclk =
9314                                 radeon_dpm_get_mclk(rdev, false) * 10;
9315                         wm_high.sclk =
9316                                 radeon_dpm_get_sclk(rdev, false) * 10;
9317                 } else {
9318                         wm_high.yclk = rdev->pm.current_mclk * 10;
9319                         wm_high.sclk = rdev->pm.current_sclk * 10;
9320                 }
9321
9322                 wm_high.disp_clk = mode->clock;
9323                 wm_high.src_width = mode->crtc_hdisplay;
9324                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9325                 wm_high.blank_time = line_time - wm_high.active_time;
9326                 wm_high.interlaced = false;
9327                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9328                         wm_high.interlaced = true;
9329                 wm_high.vsc = radeon_crtc->vsc;
9330                 wm_high.vtaps = 1;
9331                 if (radeon_crtc->rmx_type != RMX_OFF)
9332                         wm_high.vtaps = 2;
9333                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9334                 wm_high.lb_size = lb_size;
9335                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9336                 wm_high.num_heads = num_heads;
9337
9338                 /* set for high clocks */
9339                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9340
9341                 /* possibly force display priority to high */
9342                 /* should really do this at mode validation time... */
9343                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9344                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9345                     !dce8_check_latency_hiding(&wm_high) ||
9346                     (rdev->disp_priority == 2)) {
9347                         DRM_DEBUG_KMS("force priority to high\n");
9348                 }
9349
9350                 /* watermark for low clocks */
9351                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9352                     rdev->pm.dpm_enabled) {
9353                         wm_low.yclk =
9354                                 radeon_dpm_get_mclk(rdev, true) * 10;
9355                         wm_low.sclk =
9356                                 radeon_dpm_get_sclk(rdev, true) * 10;
9357                 } else {
9358                         wm_low.yclk = rdev->pm.current_mclk * 10;
9359                         wm_low.sclk = rdev->pm.current_sclk * 10;
9360                 }
9361
9362                 wm_low.disp_clk = mode->clock;
9363                 wm_low.src_width = mode->crtc_hdisplay;
9364                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9365                 wm_low.blank_time = line_time - wm_low.active_time;
9366                 wm_low.interlaced = false;
9367                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9368                         wm_low.interlaced = true;
9369                 wm_low.vsc = radeon_crtc->vsc;
9370                 wm_low.vtaps = 1;
9371                 if (radeon_crtc->rmx_type != RMX_OFF)
9372                         wm_low.vtaps = 2;
9373                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9374                 wm_low.lb_size = lb_size;
9375                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9376                 wm_low.num_heads = num_heads;
9377
9378                 /* set for low clocks */
9379                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9380
9381                 /* possibly force display priority to high */
9382                 /* should really do this at mode validation time... */
9383                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9384                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9385                     !dce8_check_latency_hiding(&wm_low) ||
9386                     (rdev->disp_priority == 2)) {
9387                         DRM_DEBUG_KMS("force priority to high\n");
9388                 }
9389         }
9390
9391         /* select wm A */
9392         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9393         tmp = wm_mask;
9394         tmp &= ~LATENCY_WATERMARK_MASK(3);
9395         tmp |= LATENCY_WATERMARK_MASK(1);
9396         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9397         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9398                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9399                 LATENCY_HIGH_WATERMARK(line_time)));
9400         /* select wm B */
9401         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9402         tmp &= ~LATENCY_WATERMARK_MASK(3);
9403         tmp |= LATENCY_WATERMARK_MASK(2);
9404         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9405         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9406                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9407                 LATENCY_HIGH_WATERMARK(line_time)));
9408         /* restore original selection */
9409         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9410
9411         /* save values for DPM */
9412         radeon_crtc->line_time = line_time;
9413         radeon_crtc->wm_high = latency_watermark_a;
9414         radeon_crtc->wm_low = latency_watermark_b;
9415 }
9416
9417 /**
9418  * dce8_bandwidth_update - program display watermarks
9419  *
9420  * @rdev: radeon_device pointer
9421  *
9422  * Calculate and program the display watermarks and line
9423  * buffer allocation (CIK).
9424  */
9425 void dce8_bandwidth_update(struct radeon_device *rdev)
9426 {
9427         struct drm_display_mode *mode = NULL;
9428         u32 num_heads = 0, lb_size;
9429         int i;
9430
9431         if (!rdev->mode_info.mode_config_initialized)
9432                 return;
9433
9434         radeon_update_display_priority(rdev);
9435
9436         for (i = 0; i < rdev->num_crtc; i++) {
9437                 if (rdev->mode_info.crtcs[i]->base.enabled)
9438                         num_heads++;
9439         }
9440         for (i = 0; i < rdev->num_crtc; i++) {
9441                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9442                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9443                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9444         }
9445 }
9446
9447 /**
9448  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9449  *
9450  * @rdev: radeon_device pointer
9451  *
9452  * Fetches a GPU clock counter snapshot (SI).
9453  * Returns the 64 bit clock counter snapshot.
9454  */
9455 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9456 {
9457         uint64_t clock;
9458
9459         mutex_lock(&rdev->gpu_clock_mutex);
9460         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9461         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9462                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9463         mutex_unlock(&rdev->gpu_clock_mutex);
9464         return clock;
9465 }
9466
9467 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9468                               u32 cntl_reg, u32 status_reg)
9469 {
9470         int r, i;
9471         struct atom_clock_dividers dividers;
9472         uint32_t tmp;
9473
9474         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475                                            clock, false, &dividers);
9476         if (r)
9477                 return r;
9478
9479         tmp = RREG32_SMC(cntl_reg);
9480         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9481         tmp |= dividers.post_divider;
9482         WREG32_SMC(cntl_reg, tmp);
9483
9484         for (i = 0; i < 100; i++) {
9485                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9486                         break;
9487                 mdelay(10);
9488         }
9489         if (i == 100)
9490                 return -ETIMEDOUT;
9491
9492         return 0;
9493 }
9494
9495 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9496 {
9497         int r = 0;
9498
9499         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9500         if (r)
9501                 return r;
9502
9503         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9504         return r;
9505 }
9506
9507 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9508 {
9509         int r, i;
9510         struct atom_clock_dividers dividers;
9511         u32 tmp;
9512
9513         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9514                                            ecclk, false, &dividers);
9515         if (r)
9516                 return r;
9517
9518         for (i = 0; i < 100; i++) {
9519                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9520                         break;
9521                 mdelay(10);
9522         }
9523         if (i == 100)
9524                 return -ETIMEDOUT;
9525
9526         tmp = RREG32_SMC(CG_ECLK_CNTL);
9527         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9528         tmp |= dividers.post_divider;
9529         WREG32_SMC(CG_ECLK_CNTL, tmp);
9530
9531         for (i = 0; i < 100; i++) {
9532                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9533                         break;
9534                 mdelay(10);
9535         }
9536         if (i == 100)
9537                 return -ETIMEDOUT;
9538
9539         return 0;
9540 }
9541
9542 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9543 {
9544         struct pci_dev *root = rdev->pdev->bus->self;
9545         int bridge_pos, gpu_pos;
9546         u32 speed_cntl, mask, current_data_rate;
9547         int ret, i;
9548         u16 tmp16;
9549
9550         if (pci_is_root_bus(rdev->pdev->bus))
9551                 return;
9552
9553         if (radeon_pcie_gen2 == 0)
9554                 return;
9555
9556         if (rdev->flags & RADEON_IS_IGP)
9557                 return;
9558
9559         if (!(rdev->flags & RADEON_IS_PCIE))
9560                 return;
9561
9562         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9563         if (ret != 0)
9564                 return;
9565
9566         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9567                 return;
9568
9569         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9570         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9571                 LC_CURRENT_DATA_RATE_SHIFT;
9572         if (mask & DRM_PCIE_SPEED_80) {
9573                 if (current_data_rate == 2) {
9574                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9575                         return;
9576                 }
9577                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9578         } else if (mask & DRM_PCIE_SPEED_50) {
9579                 if (current_data_rate == 1) {
9580                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9581                         return;
9582                 }
9583                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9584         }
9585
9586         bridge_pos = pci_pcie_cap(root);
9587         if (!bridge_pos)
9588                 return;
9589
9590         gpu_pos = pci_pcie_cap(rdev->pdev);
9591         if (!gpu_pos)
9592                 return;
9593
9594         if (mask & DRM_PCIE_SPEED_80) {
9595                 /* re-try equalization if gen3 is not already enabled */
9596                 if (current_data_rate != 2) {
9597                         u16 bridge_cfg, gpu_cfg;
9598                         u16 bridge_cfg2, gpu_cfg2;
9599                         u32 max_lw, current_lw, tmp;
9600
9601                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9602                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9603
9604                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9605                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9606
9607                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9608                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9609
9610                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9611                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9612                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9613
9614                         if (current_lw < max_lw) {
9615                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9616                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9617                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9618                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9619                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9620                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9621                                 }
9622                         }
9623
9624                         for (i = 0; i < 10; i++) {
9625                                 /* check status */
9626                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9627                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9628                                         break;
9629
9630                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9631                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9632
9633                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9634                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9635
9636                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9637                                 tmp |= LC_SET_QUIESCE;
9638                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9639
9640                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9641                                 tmp |= LC_REDO_EQ;
9642                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9643
9644                                 mdelay(100);
9645
9646                                 /* linkctl */
9647                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9648                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9649                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9650                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9651
9652                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9653                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9654                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9655                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9656
9657                                 /* linkctl2 */
9658                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9659                                 tmp16 &= ~((1 << 4) | (7 << 9));
9660                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9661                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9662
9663                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9664                                 tmp16 &= ~((1 << 4) | (7 << 9));
9665                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9666                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9667
9668                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9669                                 tmp &= ~LC_SET_QUIESCE;
9670                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9671                         }
9672                 }
9673         }
9674
9675         /* set the link speed */
9676         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9677         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9678         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9679
9680         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9681         tmp16 &= ~0xf;
9682         if (mask & DRM_PCIE_SPEED_80)
9683                 tmp16 |= 3; /* gen3 */
9684         else if (mask & DRM_PCIE_SPEED_50)
9685                 tmp16 |= 2; /* gen2 */
9686         else
9687                 tmp16 |= 1; /* gen1 */
9688         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9689
9690         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9691         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9692         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9693
9694         for (i = 0; i < rdev->usec_timeout; i++) {
9695                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9696                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9697                         break;
9698                 udelay(1);
9699         }
9700 }
9701
9702 static void cik_program_aspm(struct radeon_device *rdev)
9703 {
9704         u32 data, orig;
9705         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9706         bool disable_clkreq = false;
9707
9708         if (radeon_aspm == 0)
9709                 return;
9710
9711         /* XXX double check IGPs */
9712         if (rdev->flags & RADEON_IS_IGP)
9713                 return;
9714
9715         if (!(rdev->flags & RADEON_IS_PCIE))
9716                 return;
9717
9718         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9719         data &= ~LC_XMIT_N_FTS_MASK;
9720         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9721         if (orig != data)
9722                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9723
9724         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9725         data |= LC_GO_TO_RECOVERY;
9726         if (orig != data)
9727                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9728
9729         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9730         data |= P_IGNORE_EDB_ERR;
9731         if (orig != data)
9732                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9733
9734         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9735         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9736         data |= LC_PMI_TO_L1_DIS;
9737         if (!disable_l0s)
9738                 data |= LC_L0S_INACTIVITY(7);
9739
9740         if (!disable_l1) {
9741                 data |= LC_L1_INACTIVITY(7);
9742                 data &= ~LC_PMI_TO_L1_DIS;
9743                 if (orig != data)
9744                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9745
9746                 if (!disable_plloff_in_l1) {
9747                         bool clk_req_support;
9748
9749                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9750                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9751                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9752                         if (orig != data)
9753                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9754
9755                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9756                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9757                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9758                         if (orig != data)
9759                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9760
9761                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9762                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9763                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9764                         if (orig != data)
9765                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9766
9767                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9768                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9769                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9770                         if (orig != data)
9771                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9772
9773                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9774                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9775                         data |= LC_DYN_LANES_PWR_STATE(3);
9776                         if (orig != data)
9777                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9778
9779                         if (!disable_clkreq &&
9780                             !pci_is_root_bus(rdev->pdev->bus)) {
9781                                 struct pci_dev *root = rdev->pdev->bus->self;
9782                                 u32 lnkcap;
9783
9784                                 clk_req_support = false;
9785                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9786                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9787                                         clk_req_support = true;
9788                         } else {
9789                                 clk_req_support = false;
9790                         }
9791
9792                         if (clk_req_support) {
9793                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9794                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9795                                 if (orig != data)
9796                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9797
9798                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9799                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9800                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9801                                 if (orig != data)
9802                                         WREG32_SMC(THM_CLK_CNTL, data);
9803
9804                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9805                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9806                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9807                                 if (orig != data)
9808                                         WREG32_SMC(MISC_CLK_CTRL, data);
9809
9810                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9811                                 data &= ~BCLK_AS_XCLK;
9812                                 if (orig != data)
9813                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9814
9815                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9816                                 data &= ~FORCE_BIF_REFCLK_EN;
9817                                 if (orig != data)
9818                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9819
9820                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9821                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9822                                 data |= MPLL_CLKOUT_SEL(4);
9823                                 if (orig != data)
9824                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9825                         }
9826                 }
9827         } else {
9828                 if (orig != data)
9829                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9830         }
9831
9832         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9833         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9834         if (orig != data)
9835                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9836
9837         if (!disable_l0s) {
9838                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9839                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9840                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9841                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9842                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9843                                 data &= ~LC_L0S_INACTIVITY_MASK;
9844                                 if (orig != data)
9845                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9846                         }
9847                 }
9848         }
9849 }