Merge tag 'drm-intel-fixes-2013-11-07' of git://people.freedesktop.org/~danvet/drm...
[linux-drm-fsl-dcu.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 static void cik_rlc_stop(struct radeon_device *rdev);
71 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
72 static void cik_program_aspm(struct radeon_device *rdev);
73 static void cik_init_pg(struct radeon_device *rdev);
74 static void cik_init_cg(struct radeon_device *rdev);
75 static void cik_fini_pg(struct radeon_device *rdev);
76 static void cik_fini_cg(struct radeon_device *rdev);
77 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
78                                           bool enable);
79
80 /* get temperature in millidegrees */
81 int ci_get_temp(struct radeon_device *rdev)
82 {
83         u32 temp;
84         int actual_temp = 0;
85
86         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
87                 CTF_TEMP_SHIFT;
88
89         if (temp & 0x200)
90                 actual_temp = 255;
91         else
92                 actual_temp = temp & 0x1ff;
93
94         actual_temp = actual_temp * 1000;
95
96         return actual_temp;
97 }
98
99 /* get temperature in millidegrees */
100 int kv_get_temp(struct radeon_device *rdev)
101 {
102         u32 temp;
103         int actual_temp = 0;
104
105         temp = RREG32_SMC(0xC0300E0C);
106
107         if (temp)
108                 actual_temp = (temp / 8) - 49;
109         else
110                 actual_temp = 0;
111
112         actual_temp = actual_temp * 1000;
113
114         return actual_temp;
115 }
116
117 /*
118  * Indirect registers accessor
119  */
120 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
121 {
122         unsigned long flags;
123         u32 r;
124
125         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
126         WREG32(PCIE_INDEX, reg);
127         (void)RREG32(PCIE_INDEX);
128         r = RREG32(PCIE_DATA);
129         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
130         return r;
131 }
132
133 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
134 {
135         unsigned long flags;
136
137         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
138         WREG32(PCIE_INDEX, reg);
139         (void)RREG32(PCIE_INDEX);
140         WREG32(PCIE_DATA, v);
141         (void)RREG32(PCIE_DATA);
142         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
143 }
144
145 static const u32 spectre_rlc_save_restore_register_list[] =
146 {
147         (0x0e00 << 16) | (0xc12c >> 2),
148         0x00000000,
149         (0x0e00 << 16) | (0xc140 >> 2),
150         0x00000000,
151         (0x0e00 << 16) | (0xc150 >> 2),
152         0x00000000,
153         (0x0e00 << 16) | (0xc15c >> 2),
154         0x00000000,
155         (0x0e00 << 16) | (0xc168 >> 2),
156         0x00000000,
157         (0x0e00 << 16) | (0xc170 >> 2),
158         0x00000000,
159         (0x0e00 << 16) | (0xc178 >> 2),
160         0x00000000,
161         (0x0e00 << 16) | (0xc204 >> 2),
162         0x00000000,
163         (0x0e00 << 16) | (0xc2b4 >> 2),
164         0x00000000,
165         (0x0e00 << 16) | (0xc2b8 >> 2),
166         0x00000000,
167         (0x0e00 << 16) | (0xc2bc >> 2),
168         0x00000000,
169         (0x0e00 << 16) | (0xc2c0 >> 2),
170         0x00000000,
171         (0x0e00 << 16) | (0x8228 >> 2),
172         0x00000000,
173         (0x0e00 << 16) | (0x829c >> 2),
174         0x00000000,
175         (0x0e00 << 16) | (0x869c >> 2),
176         0x00000000,
177         (0x0600 << 16) | (0x98f4 >> 2),
178         0x00000000,
179         (0x0e00 << 16) | (0x98f8 >> 2),
180         0x00000000,
181         (0x0e00 << 16) | (0x9900 >> 2),
182         0x00000000,
183         (0x0e00 << 16) | (0xc260 >> 2),
184         0x00000000,
185         (0x0e00 << 16) | (0x90e8 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0x3c000 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0x3c00c >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0x8c1c >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0x9700 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0xcd20 >> 2),
196         0x00000000,
197         (0x4e00 << 16) | (0xcd20 >> 2),
198         0x00000000,
199         (0x5e00 << 16) | (0xcd20 >> 2),
200         0x00000000,
201         (0x6e00 << 16) | (0xcd20 >> 2),
202         0x00000000,
203         (0x7e00 << 16) | (0xcd20 >> 2),
204         0x00000000,
205         (0x8e00 << 16) | (0xcd20 >> 2),
206         0x00000000,
207         (0x9e00 << 16) | (0xcd20 >> 2),
208         0x00000000,
209         (0xae00 << 16) | (0xcd20 >> 2),
210         0x00000000,
211         (0xbe00 << 16) | (0xcd20 >> 2),
212         0x00000000,
213         (0x0e00 << 16) | (0x89bc >> 2),
214         0x00000000,
215         (0x0e00 << 16) | (0x8900 >> 2),
216         0x00000000,
217         0x3,
218         (0x0e00 << 16) | (0xc130 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc134 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc1fc >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc208 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc264 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc268 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc26c >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc270 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0xc274 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0xc278 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xc27c >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xc280 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0xc284 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0xc288 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc28c >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc290 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc294 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc298 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc29c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc2a0 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc2a4 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc2a8 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc2ac  >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2b0 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0x301d0 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0x30238 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0x30250 >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0x30254 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0x30258 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x3025c >> 2),
277         0x00000000,
278         (0x4e00 << 16) | (0xc900 >> 2),
279         0x00000000,
280         (0x5e00 << 16) | (0xc900 >> 2),
281         0x00000000,
282         (0x6e00 << 16) | (0xc900 >> 2),
283         0x00000000,
284         (0x7e00 << 16) | (0xc900 >> 2),
285         0x00000000,
286         (0x8e00 << 16) | (0xc900 >> 2),
287         0x00000000,
288         (0x9e00 << 16) | (0xc900 >> 2),
289         0x00000000,
290         (0xae00 << 16) | (0xc900 >> 2),
291         0x00000000,
292         (0xbe00 << 16) | (0xc900 >> 2),
293         0x00000000,
294         (0x4e00 << 16) | (0xc904 >> 2),
295         0x00000000,
296         (0x5e00 << 16) | (0xc904 >> 2),
297         0x00000000,
298         (0x6e00 << 16) | (0xc904 >> 2),
299         0x00000000,
300         (0x7e00 << 16) | (0xc904 >> 2),
301         0x00000000,
302         (0x8e00 << 16) | (0xc904 >> 2),
303         0x00000000,
304         (0x9e00 << 16) | (0xc904 >> 2),
305         0x00000000,
306         (0xae00 << 16) | (0xc904 >> 2),
307         0x00000000,
308         (0xbe00 << 16) | (0xc904 >> 2),
309         0x00000000,
310         (0x4e00 << 16) | (0xc908 >> 2),
311         0x00000000,
312         (0x5e00 << 16) | (0xc908 >> 2),
313         0x00000000,
314         (0x6e00 << 16) | (0xc908 >> 2),
315         0x00000000,
316         (0x7e00 << 16) | (0xc908 >> 2),
317         0x00000000,
318         (0x8e00 << 16) | (0xc908 >> 2),
319         0x00000000,
320         (0x9e00 << 16) | (0xc908 >> 2),
321         0x00000000,
322         (0xae00 << 16) | (0xc908 >> 2),
323         0x00000000,
324         (0xbe00 << 16) | (0xc908 >> 2),
325         0x00000000,
326         (0x4e00 << 16) | (0xc90c >> 2),
327         0x00000000,
328         (0x5e00 << 16) | (0xc90c >> 2),
329         0x00000000,
330         (0x6e00 << 16) | (0xc90c >> 2),
331         0x00000000,
332         (0x7e00 << 16) | (0xc90c >> 2),
333         0x00000000,
334         (0x8e00 << 16) | (0xc90c >> 2),
335         0x00000000,
336         (0x9e00 << 16) | (0xc90c >> 2),
337         0x00000000,
338         (0xae00 << 16) | (0xc90c >> 2),
339         0x00000000,
340         (0xbe00 << 16) | (0xc90c >> 2),
341         0x00000000,
342         (0x4e00 << 16) | (0xc910 >> 2),
343         0x00000000,
344         (0x5e00 << 16) | (0xc910 >> 2),
345         0x00000000,
346         (0x6e00 << 16) | (0xc910 >> 2),
347         0x00000000,
348         (0x7e00 << 16) | (0xc910 >> 2),
349         0x00000000,
350         (0x8e00 << 16) | (0xc910 >> 2),
351         0x00000000,
352         (0x9e00 << 16) | (0xc910 >> 2),
353         0x00000000,
354         (0xae00 << 16) | (0xc910 >> 2),
355         0x00000000,
356         (0xbe00 << 16) | (0xc910 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc99c >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0x9834 >> 2),
361         0x00000000,
362         (0x0000 << 16) | (0x30f00 >> 2),
363         0x00000000,
364         (0x0001 << 16) | (0x30f00 >> 2),
365         0x00000000,
366         (0x0000 << 16) | (0x30f04 >> 2),
367         0x00000000,
368         (0x0001 << 16) | (0x30f04 >> 2),
369         0x00000000,
370         (0x0000 << 16) | (0x30f08 >> 2),
371         0x00000000,
372         (0x0001 << 16) | (0x30f08 >> 2),
373         0x00000000,
374         (0x0000 << 16) | (0x30f0c >> 2),
375         0x00000000,
376         (0x0001 << 16) | (0x30f0c >> 2),
377         0x00000000,
378         (0x0600 << 16) | (0x9b7c >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0x8a14 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0x8a18 >> 2),
383         0x00000000,
384         (0x0600 << 16) | (0x30a00 >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0x8bf0 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x8bcc >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x8b24 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30a04 >> 2),
393         0x00000000,
394         (0x0600 << 16) | (0x30a10 >> 2),
395         0x00000000,
396         (0x0600 << 16) | (0x30a14 >> 2),
397         0x00000000,
398         (0x0600 << 16) | (0x30a18 >> 2),
399         0x00000000,
400         (0x0600 << 16) | (0x30a2c >> 2),
401         0x00000000,
402         (0x0e00 << 16) | (0xc700 >> 2),
403         0x00000000,
404         (0x0e00 << 16) | (0xc704 >> 2),
405         0x00000000,
406         (0x0e00 << 16) | (0xc708 >> 2),
407         0x00000000,
408         (0x0e00 << 16) | (0xc768 >> 2),
409         0x00000000,
410         (0x0400 << 16) | (0xc770 >> 2),
411         0x00000000,
412         (0x0400 << 16) | (0xc774 >> 2),
413         0x00000000,
414         (0x0400 << 16) | (0xc778 >> 2),
415         0x00000000,
416         (0x0400 << 16) | (0xc77c >> 2),
417         0x00000000,
418         (0x0400 << 16) | (0xc780 >> 2),
419         0x00000000,
420         (0x0400 << 16) | (0xc784 >> 2),
421         0x00000000,
422         (0x0400 << 16) | (0xc788 >> 2),
423         0x00000000,
424         (0x0400 << 16) | (0xc78c >> 2),
425         0x00000000,
426         (0x0400 << 16) | (0xc798 >> 2),
427         0x00000000,
428         (0x0400 << 16) | (0xc79c >> 2),
429         0x00000000,
430         (0x0400 << 16) | (0xc7a0 >> 2),
431         0x00000000,
432         (0x0400 << 16) | (0xc7a4 >> 2),
433         0x00000000,
434         (0x0400 << 16) | (0xc7a8 >> 2),
435         0x00000000,
436         (0x0400 << 16) | (0xc7ac >> 2),
437         0x00000000,
438         (0x0400 << 16) | (0xc7b0 >> 2),
439         0x00000000,
440         (0x0400 << 16) | (0xc7b4 >> 2),
441         0x00000000,
442         (0x0e00 << 16) | (0x9100 >> 2),
443         0x00000000,
444         (0x0e00 << 16) | (0x3c010 >> 2),
445         0x00000000,
446         (0x0e00 << 16) | (0x92a8 >> 2),
447         0x00000000,
448         (0x0e00 << 16) | (0x92ac >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0x92b4 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0x92b8 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0x92bc >> 2),
455         0x00000000,
456         (0x0e00 << 16) | (0x92c0 >> 2),
457         0x00000000,
458         (0x0e00 << 16) | (0x92c4 >> 2),
459         0x00000000,
460         (0x0e00 << 16) | (0x92c8 >> 2),
461         0x00000000,
462         (0x0e00 << 16) | (0x92cc >> 2),
463         0x00000000,
464         (0x0e00 << 16) | (0x92d0 >> 2),
465         0x00000000,
466         (0x0e00 << 16) | (0x8c00 >> 2),
467         0x00000000,
468         (0x0e00 << 16) | (0x8c04 >> 2),
469         0x00000000,
470         (0x0e00 << 16) | (0x8c20 >> 2),
471         0x00000000,
472         (0x0e00 << 16) | (0x8c38 >> 2),
473         0x00000000,
474         (0x0e00 << 16) | (0x8c3c >> 2),
475         0x00000000,
476         (0x0e00 << 16) | (0xae00 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x9604 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xac08 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0xac0c >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0xac10 >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0xac14 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0xac58 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0xac68 >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0xac6c >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0xac70 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0xac74 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0xac78 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0xac7c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0xac80 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0xac84 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0xac88 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0xac8c >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x970c >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x9714 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x9718 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x971c >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x31068 >> 2),
519         0x00000000,
520         (0x4e00 << 16) | (0x31068 >> 2),
521         0x00000000,
522         (0x5e00 << 16) | (0x31068 >> 2),
523         0x00000000,
524         (0x6e00 << 16) | (0x31068 >> 2),
525         0x00000000,
526         (0x7e00 << 16) | (0x31068 >> 2),
527         0x00000000,
528         (0x8e00 << 16) | (0x31068 >> 2),
529         0x00000000,
530         (0x9e00 << 16) | (0x31068 >> 2),
531         0x00000000,
532         (0xae00 << 16) | (0x31068 >> 2),
533         0x00000000,
534         (0xbe00 << 16) | (0x31068 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0xcd10 >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0xcd14 >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0x88b0 >> 2),
541         0x00000000,
542         (0x0e00 << 16) | (0x88b4 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0x88b8 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0x88bc >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0x89c0 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x88c4 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x88c8 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x88d0 >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0x88d4 >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x88d8 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x8980 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x30938 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x3093c >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x30940 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x89a0 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x30900 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x30904 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x89b4 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x3c210 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x3c214 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x3c218 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x8904 >> 2),
583         0x00000000,
584         0x5,
585         (0x0e00 << 16) | (0x8c28 >> 2),
586         (0x0e00 << 16) | (0x8c2c >> 2),
587         (0x0e00 << 16) | (0x8c30 >> 2),
588         (0x0e00 << 16) | (0x8c34 >> 2),
589         (0x0e00 << 16) | (0x9600 >> 2),
590 };
591
592 static const u32 kalindi_rlc_save_restore_register_list[] =
593 {
594         (0x0e00 << 16) | (0xc12c >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0xc140 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xc150 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0xc15c >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xc168 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xc170 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xc204 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xc2b4 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xc2b8 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xc2bc >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xc2c0 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0x8228 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x829c >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0x869c >> 2),
621         0x00000000,
622         (0x0600 << 16) | (0x98f4 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x98f8 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x9900 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xc260 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0x90e8 >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x3c000 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x3c00c >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x8c1c >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x9700 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0xcd20 >> 2),
641         0x00000000,
642         (0x4e00 << 16) | (0xcd20 >> 2),
643         0x00000000,
644         (0x5e00 << 16) | (0xcd20 >> 2),
645         0x00000000,
646         (0x6e00 << 16) | (0xcd20 >> 2),
647         0x00000000,
648         (0x7e00 << 16) | (0xcd20 >> 2),
649         0x00000000,
650         (0x0e00 << 16) | (0x89bc >> 2),
651         0x00000000,
652         (0x0e00 << 16) | (0x8900 >> 2),
653         0x00000000,
654         0x3,
655         (0x0e00 << 16) | (0xc130 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0xc134 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc1fc >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc208 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc264 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc268 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc26c >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc270 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc274 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc28c >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc290 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc294 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0xc298 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0xc2a0 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xc2a4 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xc2a8 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0xc2ac >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x301d0 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0x30238 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x30250 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x30254 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x30258 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x3025c >> 2),
700         0x00000000,
701         (0x4e00 << 16) | (0xc900 >> 2),
702         0x00000000,
703         (0x5e00 << 16) | (0xc900 >> 2),
704         0x00000000,
705         (0x6e00 << 16) | (0xc900 >> 2),
706         0x00000000,
707         (0x7e00 << 16) | (0xc900 >> 2),
708         0x00000000,
709         (0x4e00 << 16) | (0xc904 >> 2),
710         0x00000000,
711         (0x5e00 << 16) | (0xc904 >> 2),
712         0x00000000,
713         (0x6e00 << 16) | (0xc904 >> 2),
714         0x00000000,
715         (0x7e00 << 16) | (0xc904 >> 2),
716         0x00000000,
717         (0x4e00 << 16) | (0xc908 >> 2),
718         0x00000000,
719         (0x5e00 << 16) | (0xc908 >> 2),
720         0x00000000,
721         (0x6e00 << 16) | (0xc908 >> 2),
722         0x00000000,
723         (0x7e00 << 16) | (0xc908 >> 2),
724         0x00000000,
725         (0x4e00 << 16) | (0xc90c >> 2),
726         0x00000000,
727         (0x5e00 << 16) | (0xc90c >> 2),
728         0x00000000,
729         (0x6e00 << 16) | (0xc90c >> 2),
730         0x00000000,
731         (0x7e00 << 16) | (0xc90c >> 2),
732         0x00000000,
733         (0x4e00 << 16) | (0xc910 >> 2),
734         0x00000000,
735         (0x5e00 << 16) | (0xc910 >> 2),
736         0x00000000,
737         (0x6e00 << 16) | (0xc910 >> 2),
738         0x00000000,
739         (0x7e00 << 16) | (0xc910 >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0xc99c >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0x9834 >> 2),
744         0x00000000,
745         (0x0000 << 16) | (0x30f00 >> 2),
746         0x00000000,
747         (0x0000 << 16) | (0x30f04 >> 2),
748         0x00000000,
749         (0x0000 << 16) | (0x30f08 >> 2),
750         0x00000000,
751         (0x0000 << 16) | (0x30f0c >> 2),
752         0x00000000,
753         (0x0600 << 16) | (0x9b7c >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0x8a14 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x8a18 >> 2),
758         0x00000000,
759         (0x0600 << 16) | (0x30a00 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x8bf0 >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8bcc >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x8b24 >> 2),
766         0x00000000,
767         (0x0e00 << 16) | (0x30a04 >> 2),
768         0x00000000,
769         (0x0600 << 16) | (0x30a10 >> 2),
770         0x00000000,
771         (0x0600 << 16) | (0x30a14 >> 2),
772         0x00000000,
773         (0x0600 << 16) | (0x30a18 >> 2),
774         0x00000000,
775         (0x0600 << 16) | (0x30a2c >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0xc700 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc704 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc708 >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc768 >> 2),
784         0x00000000,
785         (0x0400 << 16) | (0xc770 >> 2),
786         0x00000000,
787         (0x0400 << 16) | (0xc774 >> 2),
788         0x00000000,
789         (0x0400 << 16) | (0xc798 >> 2),
790         0x00000000,
791         (0x0400 << 16) | (0xc79c >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0x9100 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0x3c010 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0x8c00 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0x8c04 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x8c20 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x8c38 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0x8c3c >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xae00 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x9604 >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0xac08 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xac0c >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xac10 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0xac14 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xac58 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0xac68 >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xac6c >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xac70 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xac74 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xac78 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0xac7c >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0xac80 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0xac84 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0xac88 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0xac8c >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0x970c >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0x9714 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0x9718 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0x971c >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x31068 >> 2),
850         0x00000000,
851         (0x4e00 << 16) | (0x31068 >> 2),
852         0x00000000,
853         (0x5e00 << 16) | (0x31068 >> 2),
854         0x00000000,
855         (0x6e00 << 16) | (0x31068 >> 2),
856         0x00000000,
857         (0x7e00 << 16) | (0x31068 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0xcd10 >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0xcd14 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0x88b0 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x88b4 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0x88b8 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0x88bc >> 2),
870         0x00000000,
871         (0x0400 << 16) | (0x89c0 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0x88c4 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0x88c8 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x88d0 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x88d4 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x88d8 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8980 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x30938 >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x3093c >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30940 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x89a0 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x30900 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x30904 >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0x89b4 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x3e1fc >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x3c210 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x3c214 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x3c218 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0x8904 >> 2),
908         0x00000000,
909         0x5,
910         (0x0e00 << 16) | (0x8c28 >> 2),
911         (0x0e00 << 16) | (0x8c2c >> 2),
912         (0x0e00 << 16) | (0x8c30 >> 2),
913         (0x0e00 << 16) | (0x8c34 >> 2),
914         (0x0e00 << 16) | (0x9600 >> 2),
915 };
916
917 static const u32 bonaire_golden_spm_registers[] =
918 {
919         0x30800, 0xe0ffffff, 0xe0000000
920 };
921
922 static const u32 bonaire_golden_common_registers[] =
923 {
924         0xc770, 0xffffffff, 0x00000800,
925         0xc774, 0xffffffff, 0x00000800,
926         0xc798, 0xffffffff, 0x00007fbf,
927         0xc79c, 0xffffffff, 0x00007faf
928 };
929
930 static const u32 bonaire_golden_registers[] =
931 {
932         0x3354, 0x00000333, 0x00000333,
933         0x3350, 0x000c0fc0, 0x00040200,
934         0x9a10, 0x00010000, 0x00058208,
935         0x3c000, 0xffff1fff, 0x00140000,
936         0x3c200, 0xfdfc0fff, 0x00000100,
937         0x3c234, 0x40000000, 0x40000200,
938         0x9830, 0xffffffff, 0x00000000,
939         0x9834, 0xf00fffff, 0x00000400,
940         0x9838, 0x0002021c, 0x00020200,
941         0xc78, 0x00000080, 0x00000000,
942         0x5bb0, 0x000000f0, 0x00000070,
943         0x5bc0, 0xf0311fff, 0x80300000,
944         0x98f8, 0x73773777, 0x12010001,
945         0x350c, 0x00810000, 0x408af000,
946         0x7030, 0x31000111, 0x00000011,
947         0x2f48, 0x73773777, 0x12010001,
948         0x220c, 0x00007fb6, 0x0021a1b1,
949         0x2210, 0x00007fb6, 0x002021b1,
950         0x2180, 0x00007fb6, 0x00002191,
951         0x2218, 0x00007fb6, 0x002121b1,
952         0x221c, 0x00007fb6, 0x002021b1,
953         0x21dc, 0x00007fb6, 0x00002191,
954         0x21e0, 0x00007fb6, 0x00002191,
955         0x3628, 0x0000003f, 0x0000000a,
956         0x362c, 0x0000003f, 0x0000000a,
957         0x2ae4, 0x00073ffe, 0x000022a2,
958         0x240c, 0x000007ff, 0x00000000,
959         0x8a14, 0xf000003f, 0x00000007,
960         0x8bf0, 0x00002001, 0x00000001,
961         0x8b24, 0xffffffff, 0x00ffffff,
962         0x30a04, 0x0000ff0f, 0x00000000,
963         0x28a4c, 0x07ffffff, 0x06000000,
964         0x4d8, 0x00000fff, 0x00000100,
965         0x3e78, 0x00000001, 0x00000002,
966         0x9100, 0x03000000, 0x0362c688,
967         0x8c00, 0x000000ff, 0x00000001,
968         0xe40, 0x00001fff, 0x00001fff,
969         0x9060, 0x0000007f, 0x00000020,
970         0x9508, 0x00010000, 0x00010000,
971         0xac14, 0x000003ff, 0x000000f3,
972         0xac0c, 0xffffffff, 0x00001032
973 };
974
975 static const u32 bonaire_mgcg_cgcg_init[] =
976 {
977         0xc420, 0xffffffff, 0xfffffffc,
978         0x30800, 0xffffffff, 0xe0000000,
979         0x3c2a0, 0xffffffff, 0x00000100,
980         0x3c208, 0xffffffff, 0x00000100,
981         0x3c2c0, 0xffffffff, 0xc0000100,
982         0x3c2c8, 0xffffffff, 0xc0000100,
983         0x3c2c4, 0xffffffff, 0xc0000100,
984         0x55e4, 0xffffffff, 0x00600100,
985         0x3c280, 0xffffffff, 0x00000100,
986         0x3c214, 0xffffffff, 0x06000100,
987         0x3c220, 0xffffffff, 0x00000100,
988         0x3c218, 0xffffffff, 0x06000100,
989         0x3c204, 0xffffffff, 0x00000100,
990         0x3c2e0, 0xffffffff, 0x00000100,
991         0x3c224, 0xffffffff, 0x00000100,
992         0x3c200, 0xffffffff, 0x00000100,
993         0x3c230, 0xffffffff, 0x00000100,
994         0x3c234, 0xffffffff, 0x00000100,
995         0x3c250, 0xffffffff, 0x00000100,
996         0x3c254, 0xffffffff, 0x00000100,
997         0x3c258, 0xffffffff, 0x00000100,
998         0x3c25c, 0xffffffff, 0x00000100,
999         0x3c260, 0xffffffff, 0x00000100,
1000         0x3c27c, 0xffffffff, 0x00000100,
1001         0x3c278, 0xffffffff, 0x00000100,
1002         0x3c210, 0xffffffff, 0x06000100,
1003         0x3c290, 0xffffffff, 0x00000100,
1004         0x3c274, 0xffffffff, 0x00000100,
1005         0x3c2b4, 0xffffffff, 0x00000100,
1006         0x3c2b0, 0xffffffff, 0x00000100,
1007         0x3c270, 0xffffffff, 0x00000100,
1008         0x30800, 0xffffffff, 0xe0000000,
1009         0x3c020, 0xffffffff, 0x00010000,
1010         0x3c024, 0xffffffff, 0x00030002,
1011         0x3c028, 0xffffffff, 0x00040007,
1012         0x3c02c, 0xffffffff, 0x00060005,
1013         0x3c030, 0xffffffff, 0x00090008,
1014         0x3c034, 0xffffffff, 0x00010000,
1015         0x3c038, 0xffffffff, 0x00030002,
1016         0x3c03c, 0xffffffff, 0x00040007,
1017         0x3c040, 0xffffffff, 0x00060005,
1018         0x3c044, 0xffffffff, 0x00090008,
1019         0x3c048, 0xffffffff, 0x00010000,
1020         0x3c04c, 0xffffffff, 0x00030002,
1021         0x3c050, 0xffffffff, 0x00040007,
1022         0x3c054, 0xffffffff, 0x00060005,
1023         0x3c058, 0xffffffff, 0x00090008,
1024         0x3c05c, 0xffffffff, 0x00010000,
1025         0x3c060, 0xffffffff, 0x00030002,
1026         0x3c064, 0xffffffff, 0x00040007,
1027         0x3c068, 0xffffffff, 0x00060005,
1028         0x3c06c, 0xffffffff, 0x00090008,
1029         0x3c070, 0xffffffff, 0x00010000,
1030         0x3c074, 0xffffffff, 0x00030002,
1031         0x3c078, 0xffffffff, 0x00040007,
1032         0x3c07c, 0xffffffff, 0x00060005,
1033         0x3c080, 0xffffffff, 0x00090008,
1034         0x3c084, 0xffffffff, 0x00010000,
1035         0x3c088, 0xffffffff, 0x00030002,
1036         0x3c08c, 0xffffffff, 0x00040007,
1037         0x3c090, 0xffffffff, 0x00060005,
1038         0x3c094, 0xffffffff, 0x00090008,
1039         0x3c098, 0xffffffff, 0x00010000,
1040         0x3c09c, 0xffffffff, 0x00030002,
1041         0x3c0a0, 0xffffffff, 0x00040007,
1042         0x3c0a4, 0xffffffff, 0x00060005,
1043         0x3c0a8, 0xffffffff, 0x00090008,
1044         0x3c000, 0xffffffff, 0x96e00200,
1045         0x8708, 0xffffffff, 0x00900100,
1046         0xc424, 0xffffffff, 0x0020003f,
1047         0x38, 0xffffffff, 0x0140001c,
1048         0x3c, 0x000f0000, 0x000f0000,
1049         0x220, 0xffffffff, 0xC060000C,
1050         0x224, 0xc0000fff, 0x00000100,
1051         0xf90, 0xffffffff, 0x00000100,
1052         0xf98, 0x00000101, 0x00000000,
1053         0x20a8, 0xffffffff, 0x00000104,
1054         0x55e4, 0xff000fff, 0x00000100,
1055         0x30cc, 0xc0000fff, 0x00000104,
1056         0xc1e4, 0x00000001, 0x00000001,
1057         0xd00c, 0xff000ff0, 0x00000100,
1058         0xd80c, 0xff000ff0, 0x00000100
1059 };
1060
1061 static const u32 spectre_golden_spm_registers[] =
1062 {
1063         0x30800, 0xe0ffffff, 0xe0000000
1064 };
1065
1066 static const u32 spectre_golden_common_registers[] =
1067 {
1068         0xc770, 0xffffffff, 0x00000800,
1069         0xc774, 0xffffffff, 0x00000800,
1070         0xc798, 0xffffffff, 0x00007fbf,
1071         0xc79c, 0xffffffff, 0x00007faf
1072 };
1073
1074 static const u32 spectre_golden_registers[] =
1075 {
1076         0x3c000, 0xffff1fff, 0x96940200,
1077         0x3c00c, 0xffff0001, 0xff000000,
1078         0x3c200, 0xfffc0fff, 0x00000100,
1079         0x6ed8, 0x00010101, 0x00010000,
1080         0x9834, 0xf00fffff, 0x00000400,
1081         0x9838, 0xfffffffc, 0x00020200,
1082         0x5bb0, 0x000000f0, 0x00000070,
1083         0x5bc0, 0xf0311fff, 0x80300000,
1084         0x98f8, 0x73773777, 0x12010001,
1085         0x9b7c, 0x00ff0000, 0x00fc0000,
1086         0x2f48, 0x73773777, 0x12010001,
1087         0x8a14, 0xf000003f, 0x00000007,
1088         0x8b24, 0xffffffff, 0x00ffffff,
1089         0x28350, 0x3f3f3fff, 0x00000082,
1090         0x28355, 0x0000003f, 0x00000000,
1091         0x3e78, 0x00000001, 0x00000002,
1092         0x913c, 0xffff03df, 0x00000004,
1093         0xc768, 0x00000008, 0x00000008,
1094         0x8c00, 0x000008ff, 0x00000800,
1095         0x9508, 0x00010000, 0x00010000,
1096         0xac0c, 0xffffffff, 0x54763210,
1097         0x214f8, 0x01ff01ff, 0x00000002,
1098         0x21498, 0x007ff800, 0x00200000,
1099         0x2015c, 0xffffffff, 0x00000f40,
1100         0x30934, 0xffffffff, 0x00000001
1101 };
1102
1103 static const u32 spectre_mgcg_cgcg_init[] =
1104 {
1105         0xc420, 0xffffffff, 0xfffffffc,
1106         0x30800, 0xffffffff, 0xe0000000,
1107         0x3c2a0, 0xffffffff, 0x00000100,
1108         0x3c208, 0xffffffff, 0x00000100,
1109         0x3c2c0, 0xffffffff, 0x00000100,
1110         0x3c2c8, 0xffffffff, 0x00000100,
1111         0x3c2c4, 0xffffffff, 0x00000100,
1112         0x55e4, 0xffffffff, 0x00600100,
1113         0x3c280, 0xffffffff, 0x00000100,
1114         0x3c214, 0xffffffff, 0x06000100,
1115         0x3c220, 0xffffffff, 0x00000100,
1116         0x3c218, 0xffffffff, 0x06000100,
1117         0x3c204, 0xffffffff, 0x00000100,
1118         0x3c2e0, 0xffffffff, 0x00000100,
1119         0x3c224, 0xffffffff, 0x00000100,
1120         0x3c200, 0xffffffff, 0x00000100,
1121         0x3c230, 0xffffffff, 0x00000100,
1122         0x3c234, 0xffffffff, 0x00000100,
1123         0x3c250, 0xffffffff, 0x00000100,
1124         0x3c254, 0xffffffff, 0x00000100,
1125         0x3c258, 0xffffffff, 0x00000100,
1126         0x3c25c, 0xffffffff, 0x00000100,
1127         0x3c260, 0xffffffff, 0x00000100,
1128         0x3c27c, 0xffffffff, 0x00000100,
1129         0x3c278, 0xffffffff, 0x00000100,
1130         0x3c210, 0xffffffff, 0x06000100,
1131         0x3c290, 0xffffffff, 0x00000100,
1132         0x3c274, 0xffffffff, 0x00000100,
1133         0x3c2b4, 0xffffffff, 0x00000100,
1134         0x3c2b0, 0xffffffff, 0x00000100,
1135         0x3c270, 0xffffffff, 0x00000100,
1136         0x30800, 0xffffffff, 0xe0000000,
1137         0x3c020, 0xffffffff, 0x00010000,
1138         0x3c024, 0xffffffff, 0x00030002,
1139         0x3c028, 0xffffffff, 0x00040007,
1140         0x3c02c, 0xffffffff, 0x00060005,
1141         0x3c030, 0xffffffff, 0x00090008,
1142         0x3c034, 0xffffffff, 0x00010000,
1143         0x3c038, 0xffffffff, 0x00030002,
1144         0x3c03c, 0xffffffff, 0x00040007,
1145         0x3c040, 0xffffffff, 0x00060005,
1146         0x3c044, 0xffffffff, 0x00090008,
1147         0x3c048, 0xffffffff, 0x00010000,
1148         0x3c04c, 0xffffffff, 0x00030002,
1149         0x3c050, 0xffffffff, 0x00040007,
1150         0x3c054, 0xffffffff, 0x00060005,
1151         0x3c058, 0xffffffff, 0x00090008,
1152         0x3c05c, 0xffffffff, 0x00010000,
1153         0x3c060, 0xffffffff, 0x00030002,
1154         0x3c064, 0xffffffff, 0x00040007,
1155         0x3c068, 0xffffffff, 0x00060005,
1156         0x3c06c, 0xffffffff, 0x00090008,
1157         0x3c070, 0xffffffff, 0x00010000,
1158         0x3c074, 0xffffffff, 0x00030002,
1159         0x3c078, 0xffffffff, 0x00040007,
1160         0x3c07c, 0xffffffff, 0x00060005,
1161         0x3c080, 0xffffffff, 0x00090008,
1162         0x3c084, 0xffffffff, 0x00010000,
1163         0x3c088, 0xffffffff, 0x00030002,
1164         0x3c08c, 0xffffffff, 0x00040007,
1165         0x3c090, 0xffffffff, 0x00060005,
1166         0x3c094, 0xffffffff, 0x00090008,
1167         0x3c098, 0xffffffff, 0x00010000,
1168         0x3c09c, 0xffffffff, 0x00030002,
1169         0x3c0a0, 0xffffffff, 0x00040007,
1170         0x3c0a4, 0xffffffff, 0x00060005,
1171         0x3c0a8, 0xffffffff, 0x00090008,
1172         0x3c0ac, 0xffffffff, 0x00010000,
1173         0x3c0b0, 0xffffffff, 0x00030002,
1174         0x3c0b4, 0xffffffff, 0x00040007,
1175         0x3c0b8, 0xffffffff, 0x00060005,
1176         0x3c0bc, 0xffffffff, 0x00090008,
1177         0x3c000, 0xffffffff, 0x96e00200,
1178         0x8708, 0xffffffff, 0x00900100,
1179         0xc424, 0xffffffff, 0x0020003f,
1180         0x38, 0xffffffff, 0x0140001c,
1181         0x3c, 0x000f0000, 0x000f0000,
1182         0x220, 0xffffffff, 0xC060000C,
1183         0x224, 0xc0000fff, 0x00000100,
1184         0xf90, 0xffffffff, 0x00000100,
1185         0xf98, 0x00000101, 0x00000000,
1186         0x20a8, 0xffffffff, 0x00000104,
1187         0x55e4, 0xff000fff, 0x00000100,
1188         0x30cc, 0xc0000fff, 0x00000104,
1189         0xc1e4, 0x00000001, 0x00000001,
1190         0xd00c, 0xff000ff0, 0x00000100,
1191         0xd80c, 0xff000ff0, 0x00000100
1192 };
1193
1194 static const u32 kalindi_golden_spm_registers[] =
1195 {
1196         0x30800, 0xe0ffffff, 0xe0000000
1197 };
1198
1199 static const u32 kalindi_golden_common_registers[] =
1200 {
1201         0xc770, 0xffffffff, 0x00000800,
1202         0xc774, 0xffffffff, 0x00000800,
1203         0xc798, 0xffffffff, 0x00007fbf,
1204         0xc79c, 0xffffffff, 0x00007faf
1205 };
1206
1207 static const u32 kalindi_golden_registers[] =
1208 {
1209         0x3c000, 0xffffdfff, 0x6e944040,
1210         0x55e4, 0xff607fff, 0xfc000100,
1211         0x3c220, 0xff000fff, 0x00000100,
1212         0x3c224, 0xff000fff, 0x00000100,
1213         0x3c200, 0xfffc0fff, 0x00000100,
1214         0x6ed8, 0x00010101, 0x00010000,
1215         0x9830, 0xffffffff, 0x00000000,
1216         0x9834, 0xf00fffff, 0x00000400,
1217         0x5bb0, 0x000000f0, 0x00000070,
1218         0x5bc0, 0xf0311fff, 0x80300000,
1219         0x98f8, 0x73773777, 0x12010001,
1220         0x98fc, 0xffffffff, 0x00000010,
1221         0x9b7c, 0x00ff0000, 0x00fc0000,
1222         0x8030, 0x00001f0f, 0x0000100a,
1223         0x2f48, 0x73773777, 0x12010001,
1224         0x2408, 0x000fffff, 0x000c007f,
1225         0x8a14, 0xf000003f, 0x00000007,
1226         0x8b24, 0x3fff3fff, 0x00ffcfff,
1227         0x30a04, 0x0000ff0f, 0x00000000,
1228         0x28a4c, 0x07ffffff, 0x06000000,
1229         0x4d8, 0x00000fff, 0x00000100,
1230         0x3e78, 0x00000001, 0x00000002,
1231         0xc768, 0x00000008, 0x00000008,
1232         0x8c00, 0x000000ff, 0x00000003,
1233         0x214f8, 0x01ff01ff, 0x00000002,
1234         0x21498, 0x007ff800, 0x00200000,
1235         0x2015c, 0xffffffff, 0x00000f40,
1236         0x88c4, 0x001f3ae3, 0x00000082,
1237         0x88d4, 0x0000001f, 0x00000010,
1238         0x30934, 0xffffffff, 0x00000000
1239 };
1240
1241 static const u32 kalindi_mgcg_cgcg_init[] =
1242 {
1243         0xc420, 0xffffffff, 0xfffffffc,
1244         0x30800, 0xffffffff, 0xe0000000,
1245         0x3c2a0, 0xffffffff, 0x00000100,
1246         0x3c208, 0xffffffff, 0x00000100,
1247         0x3c2c0, 0xffffffff, 0x00000100,
1248         0x3c2c8, 0xffffffff, 0x00000100,
1249         0x3c2c4, 0xffffffff, 0x00000100,
1250         0x55e4, 0xffffffff, 0x00600100,
1251         0x3c280, 0xffffffff, 0x00000100,
1252         0x3c214, 0xffffffff, 0x06000100,
1253         0x3c220, 0xffffffff, 0x00000100,
1254         0x3c218, 0xffffffff, 0x06000100,
1255         0x3c204, 0xffffffff, 0x00000100,
1256         0x3c2e0, 0xffffffff, 0x00000100,
1257         0x3c224, 0xffffffff, 0x00000100,
1258         0x3c200, 0xffffffff, 0x00000100,
1259         0x3c230, 0xffffffff, 0x00000100,
1260         0x3c234, 0xffffffff, 0x00000100,
1261         0x3c250, 0xffffffff, 0x00000100,
1262         0x3c254, 0xffffffff, 0x00000100,
1263         0x3c258, 0xffffffff, 0x00000100,
1264         0x3c25c, 0xffffffff, 0x00000100,
1265         0x3c260, 0xffffffff, 0x00000100,
1266         0x3c27c, 0xffffffff, 0x00000100,
1267         0x3c278, 0xffffffff, 0x00000100,
1268         0x3c210, 0xffffffff, 0x06000100,
1269         0x3c290, 0xffffffff, 0x00000100,
1270         0x3c274, 0xffffffff, 0x00000100,
1271         0x3c2b4, 0xffffffff, 0x00000100,
1272         0x3c2b0, 0xffffffff, 0x00000100,
1273         0x3c270, 0xffffffff, 0x00000100,
1274         0x30800, 0xffffffff, 0xe0000000,
1275         0x3c020, 0xffffffff, 0x00010000,
1276         0x3c024, 0xffffffff, 0x00030002,
1277         0x3c028, 0xffffffff, 0x00040007,
1278         0x3c02c, 0xffffffff, 0x00060005,
1279         0x3c030, 0xffffffff, 0x00090008,
1280         0x3c034, 0xffffffff, 0x00010000,
1281         0x3c038, 0xffffffff, 0x00030002,
1282         0x3c03c, 0xffffffff, 0x00040007,
1283         0x3c040, 0xffffffff, 0x00060005,
1284         0x3c044, 0xffffffff, 0x00090008,
1285         0x3c000, 0xffffffff, 0x96e00200,
1286         0x8708, 0xffffffff, 0x00900100,
1287         0xc424, 0xffffffff, 0x0020003f,
1288         0x38, 0xffffffff, 0x0140001c,
1289         0x3c, 0x000f0000, 0x000f0000,
1290         0x220, 0xffffffff, 0xC060000C,
1291         0x224, 0xc0000fff, 0x00000100,
1292         0x20a8, 0xffffffff, 0x00000104,
1293         0x55e4, 0xff000fff, 0x00000100,
1294         0x30cc, 0xc0000fff, 0x00000104,
1295         0xc1e4, 0x00000001, 0x00000001,
1296         0xd00c, 0xff000ff0, 0x00000100,
1297         0xd80c, 0xff000ff0, 0x00000100
1298 };
1299
1300 static void cik_init_golden_registers(struct radeon_device *rdev)
1301 {
1302         switch (rdev->family) {
1303         case CHIP_BONAIRE:
1304                 radeon_program_register_sequence(rdev,
1305                                                  bonaire_mgcg_cgcg_init,
1306                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1307                 radeon_program_register_sequence(rdev,
1308                                                  bonaire_golden_registers,
1309                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1310                 radeon_program_register_sequence(rdev,
1311                                                  bonaire_golden_common_registers,
1312                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1313                 radeon_program_register_sequence(rdev,
1314                                                  bonaire_golden_spm_registers,
1315                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1316                 break;
1317         case CHIP_KABINI:
1318                 radeon_program_register_sequence(rdev,
1319                                                  kalindi_mgcg_cgcg_init,
1320                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1321                 radeon_program_register_sequence(rdev,
1322                                                  kalindi_golden_registers,
1323                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1324                 radeon_program_register_sequence(rdev,
1325                                                  kalindi_golden_common_registers,
1326                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1327                 radeon_program_register_sequence(rdev,
1328                                                  kalindi_golden_spm_registers,
1329                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1330                 break;
1331         case CHIP_KAVERI:
1332                 radeon_program_register_sequence(rdev,
1333                                                  spectre_mgcg_cgcg_init,
1334                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1335                 radeon_program_register_sequence(rdev,
1336                                                  spectre_golden_registers,
1337                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1338                 radeon_program_register_sequence(rdev,
1339                                                  spectre_golden_common_registers,
1340                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1341                 radeon_program_register_sequence(rdev,
1342                                                  spectre_golden_spm_registers,
1343                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1344                 break;
1345         default:
1346                 break;
1347         }
1348 }
1349
1350 /**
1351  * cik_get_xclk - get the xclk
1352  *
1353  * @rdev: radeon_device pointer
1354  *
1355  * Returns the reference clock used by the gfx engine
1356  * (CIK).
1357  */
1358 u32 cik_get_xclk(struct radeon_device *rdev)
1359 {
1360         u32 reference_clock = rdev->clock.spll.reference_freq;
1361
1362         if (rdev->flags & RADEON_IS_IGP) {
1363                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1364                         return reference_clock / 2;
1365         } else {
1366                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1367                         return reference_clock / 4;
1368         }
1369         return reference_clock;
1370 }
1371
1372 /**
1373  * cik_mm_rdoorbell - read a doorbell dword
1374  *
1375  * @rdev: radeon_device pointer
1376  * @offset: byte offset into the aperture
1377  *
1378  * Returns the value in the doorbell aperture at the
1379  * requested offset (CIK).
1380  */
1381 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1382 {
1383         if (offset < rdev->doorbell.size) {
1384                 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1385         } else {
1386                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1387                 return 0;
1388         }
1389 }
1390
1391 /**
1392  * cik_mm_wdoorbell - write a doorbell dword
1393  *
1394  * @rdev: radeon_device pointer
1395  * @offset: byte offset into the aperture
1396  * @v: value to write
1397  *
1398  * Writes @v to the doorbell aperture at the
1399  * requested offset (CIK).
1400  */
1401 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1402 {
1403         if (offset < rdev->doorbell.size) {
1404                 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1405         } else {
1406                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1407         }
1408 }
1409
1410 #define BONAIRE_IO_MC_REGS_SIZE 36
1411
1412 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1413 {
1414         {0x00000070, 0x04400000},
1415         {0x00000071, 0x80c01803},
1416         {0x00000072, 0x00004004},
1417         {0x00000073, 0x00000100},
1418         {0x00000074, 0x00ff0000},
1419         {0x00000075, 0x34000000},
1420         {0x00000076, 0x08000014},
1421         {0x00000077, 0x00cc08ec},
1422         {0x00000078, 0x00000400},
1423         {0x00000079, 0x00000000},
1424         {0x0000007a, 0x04090000},
1425         {0x0000007c, 0x00000000},
1426         {0x0000007e, 0x4408a8e8},
1427         {0x0000007f, 0x00000304},
1428         {0x00000080, 0x00000000},
1429         {0x00000082, 0x00000001},
1430         {0x00000083, 0x00000002},
1431         {0x00000084, 0xf3e4f400},
1432         {0x00000085, 0x052024e3},
1433         {0x00000087, 0x00000000},
1434         {0x00000088, 0x01000000},
1435         {0x0000008a, 0x1c0a0000},
1436         {0x0000008b, 0xff010000},
1437         {0x0000008d, 0xffffefff},
1438         {0x0000008e, 0xfff3efff},
1439         {0x0000008f, 0xfff3efbf},
1440         {0x00000092, 0xf7ffffff},
1441         {0x00000093, 0xffffff7f},
1442         {0x00000095, 0x00101101},
1443         {0x00000096, 0x00000fff},
1444         {0x00000097, 0x00116fff},
1445         {0x00000098, 0x60010000},
1446         {0x00000099, 0x10010000},
1447         {0x0000009a, 0x00006000},
1448         {0x0000009b, 0x00001000},
1449         {0x0000009f, 0x00b48000}
1450 };
1451
1452 /**
1453  * cik_srbm_select - select specific register instances
1454  *
1455  * @rdev: radeon_device pointer
1456  * @me: selected ME (micro engine)
1457  * @pipe: pipe
1458  * @queue: queue
1459  * @vmid: VMID
1460  *
1461  * Switches the currently active registers instances.  Some
1462  * registers are instanced per VMID, others are instanced per
1463  * me/pipe/queue combination.
1464  */
1465 static void cik_srbm_select(struct radeon_device *rdev,
1466                             u32 me, u32 pipe, u32 queue, u32 vmid)
1467 {
1468         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1469                              MEID(me & 0x3) |
1470                              VMID(vmid & 0xf) |
1471                              QUEUEID(queue & 0x7));
1472         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1473 }
1474
1475 /* ucode loading */
1476 /**
1477  * ci_mc_load_microcode - load MC ucode into the hw
1478  *
1479  * @rdev: radeon_device pointer
1480  *
1481  * Load the GDDR MC ucode into the hw (CIK).
1482  * Returns 0 on success, error on failure.
1483  */
1484 static int ci_mc_load_microcode(struct radeon_device *rdev)
1485 {
1486         const __be32 *fw_data;
1487         u32 running, blackout = 0;
1488         u32 *io_mc_regs;
1489         int i, ucode_size, regs_size;
1490
1491         if (!rdev->mc_fw)
1492                 return -EINVAL;
1493
1494         switch (rdev->family) {
1495         case CHIP_BONAIRE:
1496         default:
1497                 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1498                 ucode_size = CIK_MC_UCODE_SIZE;
1499                 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1500                 break;
1501         }
1502
1503         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1504
1505         if (running == 0) {
1506                 if (running) {
1507                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1508                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1509                 }
1510
1511                 /* reset the engine and set to writable */
1512                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1513                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1514
1515                 /* load mc io regs */
1516                 for (i = 0; i < regs_size; i++) {
1517                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1518                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1519                 }
1520                 /* load the MC ucode */
1521                 fw_data = (const __be32 *)rdev->mc_fw->data;
1522                 for (i = 0; i < ucode_size; i++)
1523                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1524
1525                 /* put the engine back into the active state */
1526                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1527                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1528                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1529
1530                 /* wait for training to complete */
1531                 for (i = 0; i < rdev->usec_timeout; i++) {
1532                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1533                                 break;
1534                         udelay(1);
1535                 }
1536                 for (i = 0; i < rdev->usec_timeout; i++) {
1537                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1538                                 break;
1539                         udelay(1);
1540                 }
1541
1542                 if (running)
1543                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1544         }
1545
1546         return 0;
1547 }
1548
1549 /**
1550  * cik_init_microcode - load ucode images from disk
1551  *
1552  * @rdev: radeon_device pointer
1553  *
1554  * Use the firmware interface to load the ucode images into
1555  * the driver (not loaded into hw).
1556  * Returns 0 on success, error on failure.
1557  */
1558 static int cik_init_microcode(struct radeon_device *rdev)
1559 {
1560         const char *chip_name;
1561         size_t pfp_req_size, me_req_size, ce_req_size,
1562                 mec_req_size, rlc_req_size, mc_req_size,
1563                 sdma_req_size, smc_req_size;
1564         char fw_name[30];
1565         int err;
1566
1567         DRM_DEBUG("\n");
1568
1569         switch (rdev->family) {
1570         case CHIP_BONAIRE:
1571                 chip_name = "BONAIRE";
1572                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1573                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1574                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1575                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1576                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1577                 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1578                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1579                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1580                 break;
1581         case CHIP_KAVERI:
1582                 chip_name = "KAVERI";
1583                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1584                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1585                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1586                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1587                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1588                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1589                 break;
1590         case CHIP_KABINI:
1591                 chip_name = "KABINI";
1592                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1593                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1594                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1595                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1596                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1597                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1598                 break;
1599         default: BUG();
1600         }
1601
1602         DRM_INFO("Loading %s Microcode\n", chip_name);
1603
1604         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1605         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1606         if (err)
1607                 goto out;
1608         if (rdev->pfp_fw->size != pfp_req_size) {
1609                 printk(KERN_ERR
1610                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1611                        rdev->pfp_fw->size, fw_name);
1612                 err = -EINVAL;
1613                 goto out;
1614         }
1615
1616         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1617         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1618         if (err)
1619                 goto out;
1620         if (rdev->me_fw->size != me_req_size) {
1621                 printk(KERN_ERR
1622                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1623                        rdev->me_fw->size, fw_name);
1624                 err = -EINVAL;
1625         }
1626
1627         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1628         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1629         if (err)
1630                 goto out;
1631         if (rdev->ce_fw->size != ce_req_size) {
1632                 printk(KERN_ERR
1633                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1634                        rdev->ce_fw->size, fw_name);
1635                 err = -EINVAL;
1636         }
1637
1638         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1639         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1640         if (err)
1641                 goto out;
1642         if (rdev->mec_fw->size != mec_req_size) {
1643                 printk(KERN_ERR
1644                        "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1645                        rdev->mec_fw->size, fw_name);
1646                 err = -EINVAL;
1647         }
1648
1649         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1650         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651         if (err)
1652                 goto out;
1653         if (rdev->rlc_fw->size != rlc_req_size) {
1654                 printk(KERN_ERR
1655                        "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1656                        rdev->rlc_fw->size, fw_name);
1657                 err = -EINVAL;
1658         }
1659
1660         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1661         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1662         if (err)
1663                 goto out;
1664         if (rdev->sdma_fw->size != sdma_req_size) {
1665                 printk(KERN_ERR
1666                        "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1667                        rdev->sdma_fw->size, fw_name);
1668                 err = -EINVAL;
1669         }
1670
1671         /* No SMC, MC ucode on APUs */
1672         if (!(rdev->flags & RADEON_IS_IGP)) {
1673                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1674                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1675                 if (err)
1676                         goto out;
1677                 if (rdev->mc_fw->size != mc_req_size) {
1678                         printk(KERN_ERR
1679                                "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1680                                rdev->mc_fw->size, fw_name);
1681                         err = -EINVAL;
1682                 }
1683
1684                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1685                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1686                 if (err) {
1687                         printk(KERN_ERR
1688                                "smc: error loading firmware \"%s\"\n",
1689                                fw_name);
1690                         release_firmware(rdev->smc_fw);
1691                         rdev->smc_fw = NULL;
1692                         err = 0;
1693                 } else if (rdev->smc_fw->size != smc_req_size) {
1694                         printk(KERN_ERR
1695                                "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1696                                rdev->smc_fw->size, fw_name);
1697                         err = -EINVAL;
1698                 }
1699         }
1700
1701 out:
1702         if (err) {
1703                 if (err != -EINVAL)
1704                         printk(KERN_ERR
1705                                "cik_cp: Failed to load firmware \"%s\"\n",
1706                                fw_name);
1707                 release_firmware(rdev->pfp_fw);
1708                 rdev->pfp_fw = NULL;
1709                 release_firmware(rdev->me_fw);
1710                 rdev->me_fw = NULL;
1711                 release_firmware(rdev->ce_fw);
1712                 rdev->ce_fw = NULL;
1713                 release_firmware(rdev->rlc_fw);
1714                 rdev->rlc_fw = NULL;
1715                 release_firmware(rdev->mc_fw);
1716                 rdev->mc_fw = NULL;
1717                 release_firmware(rdev->smc_fw);
1718                 rdev->smc_fw = NULL;
1719         }
1720         return err;
1721 }
1722
1723 /*
1724  * Core functions
1725  */
1726 /**
1727  * cik_tiling_mode_table_init - init the hw tiling table
1728  *
1729  * @rdev: radeon_device pointer
1730  *
1731  * Starting with SI, the tiling setup is done globally in a
1732  * set of 32 tiling modes.  Rather than selecting each set of
1733  * parameters per surface as on older asics, we just select
1734  * which index in the tiling table we want to use, and the
1735  * surface uses those parameters (CIK).
1736  */
1737 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1738 {
1739         const u32 num_tile_mode_states = 32;
1740         const u32 num_secondary_tile_mode_states = 16;
1741         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1742         u32 num_pipe_configs;
1743         u32 num_rbs = rdev->config.cik.max_backends_per_se *
1744                 rdev->config.cik.max_shader_engines;
1745
1746         switch (rdev->config.cik.mem_row_size_in_kb) {
1747         case 1:
1748                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1749                 break;
1750         case 2:
1751         default:
1752                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1753                 break;
1754         case 4:
1755                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1756                 break;
1757         }
1758
1759         num_pipe_configs = rdev->config.cik.max_tile_pipes;
1760         if (num_pipe_configs > 8)
1761                 num_pipe_configs = 8; /* ??? */
1762
1763         if (num_pipe_configs == 8) {
1764                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1765                         switch (reg_offset) {
1766                         case 0:
1767                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1768                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1769                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1770                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1771                                 break;
1772                         case 1:
1773                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1775                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1776                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1777                                 break;
1778                         case 2:
1779                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1781                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1782                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1783                                 break;
1784                         case 3:
1785                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1786                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1787                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1788                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1789                                 break;
1790                         case 4:
1791                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1792                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1793                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1794                                                  TILE_SPLIT(split_equal_to_row_size));
1795                                 break;
1796                         case 5:
1797                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1798                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1799                                 break;
1800                         case 6:
1801                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1802                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1803                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1804                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1805                                 break;
1806                         case 7:
1807                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1808                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1809                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1810                                                  TILE_SPLIT(split_equal_to_row_size));
1811                                 break;
1812                         case 8:
1813                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1814                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1815                                 break;
1816                         case 9:
1817                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1818                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1819                                 break;
1820                         case 10:
1821                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1822                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1823                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1824                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1825                                 break;
1826                         case 11:
1827                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1828                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1829                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1830                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1831                                 break;
1832                         case 12:
1833                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1834                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1835                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1836                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1837                                 break;
1838                         case 13:
1839                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1840                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1841                                 break;
1842                         case 14:
1843                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1844                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1845                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1846                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1847                                 break;
1848                         case 16:
1849                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1850                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1851                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1852                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1853                                 break;
1854                         case 17:
1855                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1856                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1857                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1858                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1859                                 break;
1860                         case 27:
1861                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1862                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1863                                 break;
1864                         case 28:
1865                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1866                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1867                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1868                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1869                                 break;
1870                         case 29:
1871                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1873                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1874                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1875                                 break;
1876                         case 30:
1877                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1878                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1879                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1880                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1881                                 break;
1882                         default:
1883                                 gb_tile_moden = 0;
1884                                 break;
1885                         }
1886                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1887                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1888                 }
1889                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1890                         switch (reg_offset) {
1891                         case 0:
1892                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1893                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1894                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1895                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1896                                 break;
1897                         case 1:
1898                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1899                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1900                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1901                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1902                                 break;
1903                         case 2:
1904                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1906                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1907                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1908                                 break;
1909                         case 3:
1910                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1911                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1912                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1913                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1914                                 break;
1915                         case 4:
1916                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1917                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1918                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1919                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1920                                 break;
1921                         case 5:
1922                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1923                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1924                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1925                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1926                                 break;
1927                         case 6:
1928                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1929                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1930                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1931                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1932                                 break;
1933                         case 8:
1934                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1936                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1937                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1938                                 break;
1939                         case 9:
1940                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1941                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1942                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1943                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1944                                 break;
1945                         case 10:
1946                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1947                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1948                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1949                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1950                                 break;
1951                         case 11:
1952                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1954                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1955                                                  NUM_BANKS(ADDR_SURF_16_BANK));
1956                                 break;
1957                         case 12:
1958                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1959                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1960                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1961                                                  NUM_BANKS(ADDR_SURF_8_BANK));
1962                                 break;
1963                         case 13:
1964                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1966                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1967                                                  NUM_BANKS(ADDR_SURF_4_BANK));
1968                                 break;
1969                         case 14:
1970                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1971                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1972                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1973                                                  NUM_BANKS(ADDR_SURF_2_BANK));
1974                                 break;
1975                         default:
1976                                 gb_tile_moden = 0;
1977                                 break;
1978                         }
1979                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1980                 }
1981         } else if (num_pipe_configs == 4) {
1982                 if (num_rbs == 4) {
1983                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1984                                 switch (reg_offset) {
1985                                 case 0:
1986                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1987                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1988                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1989                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1990                                         break;
1991                                 case 1:
1992                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1993                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1994                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1995                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1996                                         break;
1997                                 case 2:
1998                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1999                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2000                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2001                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2002                                         break;
2003                                 case 3:
2004                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2005                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2006                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2007                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2008                                         break;
2009                                 case 4:
2010                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2011                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2012                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2013                                                          TILE_SPLIT(split_equal_to_row_size));
2014                                         break;
2015                                 case 5:
2016                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2017                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2018                                         break;
2019                                 case 6:
2020                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2021                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2022                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2023                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2024                                         break;
2025                                 case 7:
2026                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2027                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2028                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2029                                                          TILE_SPLIT(split_equal_to_row_size));
2030                                         break;
2031                                 case 8:
2032                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2033                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2034                                         break;
2035                                 case 9:
2036                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2037                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2038                                         break;
2039                                 case 10:
2040                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2042                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2043                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2044                                         break;
2045                                 case 11:
2046                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2047                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2048                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2049                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2050                                         break;
2051                                 case 12:
2052                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2053                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2054                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2055                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2056                                         break;
2057                                 case 13:
2058                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2059                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2060                                         break;
2061                                 case 14:
2062                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2063                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2064                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2065                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2066                                         break;
2067                                 case 16:
2068                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2069                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2070                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2071                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072                                         break;
2073                                 case 17:
2074                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2075                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2076                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2077                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078                                         break;
2079                                 case 27:
2080                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2081                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2082                                         break;
2083                                 case 28:
2084                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2085                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2086                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2087                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2088                                         break;
2089                                 case 29:
2090                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2091                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2092                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2093                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094                                         break;
2095                                 case 30:
2096                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2097                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2098                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2099                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2100                                         break;
2101                                 default:
2102                                         gb_tile_moden = 0;
2103                                         break;
2104                                 }
2105                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2106                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2107                         }
2108                 } else if (num_rbs < 4) {
2109                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2110                                 switch (reg_offset) {
2111                                 case 0:
2112                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2113                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2114                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2115                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2116                                         break;
2117                                 case 1:
2118                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2120                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2121                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2122                                         break;
2123                                 case 2:
2124                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2126                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2127                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2128                                         break;
2129                                 case 3:
2130                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2132                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2133                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2134                                         break;
2135                                 case 4:
2136                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2138                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2139                                                          TILE_SPLIT(split_equal_to_row_size));
2140                                         break;
2141                                 case 5:
2142                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2143                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144                                         break;
2145                                 case 6:
2146                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2147                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2148                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2149                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2150                                         break;
2151                                 case 7:
2152                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2153                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2154                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2155                                                          TILE_SPLIT(split_equal_to_row_size));
2156                                         break;
2157                                 case 8:
2158                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2159                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2160                                         break;
2161                                 case 9:
2162                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2163                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2164                                         break;
2165                                 case 10:
2166                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2168                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2169                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170                                         break;
2171                                 case 11:
2172                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2174                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2175                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176                                         break;
2177                                 case 12:
2178                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2179                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2181                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182                                         break;
2183                                 case 13:
2184                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2185                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2186                                         break;
2187                                 case 14:
2188                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2189                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2190                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2191                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2192                                         break;
2193                                 case 16:
2194                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2195                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2197                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198                                         break;
2199                                 case 17:
2200                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2201                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2202                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2203                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                                         break;
2205                                 case 27:
2206                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2207                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2208                                         break;
2209                                 case 28:
2210                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2211                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2212                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2213                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2214                                         break;
2215                                 case 29:
2216                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2218                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2219                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2220                                         break;
2221                                 case 30:
2222                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2223                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2224                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2225                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2226                                         break;
2227                                 default:
2228                                         gb_tile_moden = 0;
2229                                         break;
2230                                 }
2231                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2232                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2233                         }
2234                 }
2235                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2236                         switch (reg_offset) {
2237                         case 0:
2238                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2239                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2240                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2241                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2242                                 break;
2243                         case 1:
2244                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2245                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2246                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2247                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2248                                 break;
2249                         case 2:
2250                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2251                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2252                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2254                                 break;
2255                         case 3:
2256                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2257                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2258                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2260                                 break;
2261                         case 4:
2262                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2264                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2266                                 break;
2267                         case 5:
2268                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2270                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2271                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2272                                 break;
2273                         case 6:
2274                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2276                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2277                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2278                                 break;
2279                         case 8:
2280                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2281                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2282                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2283                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2284                                 break;
2285                         case 9:
2286                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2288                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2290                                 break;
2291                         case 10:
2292                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2293                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2296                                 break;
2297                         case 11:
2298                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2300                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2301                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2302                                 break;
2303                         case 12:
2304                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2307                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2308                                 break;
2309                         case 13:
2310                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2311                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2312                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2313                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2314                                 break;
2315                         case 14:
2316                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2317                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2318                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2319                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2320                                 break;
2321                         default:
2322                                 gb_tile_moden = 0;
2323                                 break;
2324                         }
2325                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2326                 }
2327         } else if (num_pipe_configs == 2) {
2328                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2329                         switch (reg_offset) {
2330                         case 0:
2331                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2333                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2334                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2335                                 break;
2336                         case 1:
2337                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2339                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2340                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2341                                 break;
2342                         case 2:
2343                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2345                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2346                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2347                                 break;
2348                         case 3:
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2352                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2353                                 break;
2354                         case 4:
2355                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2358                                                  TILE_SPLIT(split_equal_to_row_size));
2359                                 break;
2360                         case 5:
2361                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2362                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2363                                 break;
2364                         case 6:
2365                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2366                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2368                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369                                 break;
2370                         case 7:
2371                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2372                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2373                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2374                                                  TILE_SPLIT(split_equal_to_row_size));
2375                                 break;
2376                         case 8:
2377                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2378                                 break;
2379                         case 9:
2380                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2382                                 break;
2383                         case 10:
2384                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2385                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2386                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2387                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388                                 break;
2389                         case 11:
2390                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2392                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2393                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2394                                 break;
2395                         case 12:
2396                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2397                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2399                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400                                 break;
2401                         case 13:
2402                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2404                                 break;
2405                         case 14:
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2409                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                                 break;
2411                         case 16:
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2415                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                                 break;
2417                         case 17:
2418                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2419                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2421                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2422                                 break;
2423                         case 27:
2424                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2426                                 break;
2427                         case 28:
2428                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2431                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                                 break;
2433                         case 29:
2434                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2437                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                                 break;
2439                         case 30:
2440                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                                                  PIPE_CONFIG(ADDR_SURF_P2) |
2443                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                                 break;
2445                         default:
2446                                 gb_tile_moden = 0;
2447                                 break;
2448                         }
2449                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2450                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2451                 }
2452                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2453                         switch (reg_offset) {
2454                         case 0:
2455                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2456                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2458                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2459                                 break;
2460                         case 1:
2461                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2462                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2463                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2464                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2465                                 break;
2466                         case 2:
2467                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2469                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2470                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2471                                 break;
2472                         case 3:
2473                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2476                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2477                                 break;
2478                         case 4:
2479                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2481                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2482                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2483                                 break;
2484                         case 5:
2485                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2488                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2489                                 break;
2490                         case 6:
2491                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2495                                 break;
2496                         case 8:
2497                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2498                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2499                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2500                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2501                                 break;
2502                         case 9:
2503                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2506                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2507                                 break;
2508                         case 10:
2509                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2512                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2513                                 break;
2514                         case 11:
2515                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2516                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2517                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2519                                 break;
2520                         case 12:
2521                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2523                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2524                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2525                                 break;
2526                         case 13:
2527                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2530                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2531                                 break;
2532                         case 14:
2533                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2536                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2537                                 break;
2538                         default:
2539                                 gb_tile_moden = 0;
2540                                 break;
2541                         }
2542                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2543                 }
2544         } else
2545                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2546 }
2547
2548 /**
2549  * cik_select_se_sh - select which SE, SH to address
2550  *
2551  * @rdev: radeon_device pointer
2552  * @se_num: shader engine to address
2553  * @sh_num: sh block to address
2554  *
2555  * Select which SE, SH combinations to address. Certain
2556  * registers are instanced per SE or SH.  0xffffffff means
2557  * broadcast to all SEs or SHs (CIK).
2558  */
2559 static void cik_select_se_sh(struct radeon_device *rdev,
2560                              u32 se_num, u32 sh_num)
2561 {
2562         u32 data = INSTANCE_BROADCAST_WRITES;
2563
2564         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2565                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2566         else if (se_num == 0xffffffff)
2567                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2568         else if (sh_num == 0xffffffff)
2569                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2570         else
2571                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2572         WREG32(GRBM_GFX_INDEX, data);
2573 }
2574
2575 /**
2576  * cik_create_bitmask - create a bitmask
2577  *
2578  * @bit_width: length of the mask
2579  *
2580  * create a variable length bit mask (CIK).
2581  * Returns the bitmask.
2582  */
2583 static u32 cik_create_bitmask(u32 bit_width)
2584 {
2585         u32 i, mask = 0;
2586
2587         for (i = 0; i < bit_width; i++) {
2588                 mask <<= 1;
2589                 mask |= 1;
2590         }
2591         return mask;
2592 }
2593
2594 /**
2595  * cik_select_se_sh - select which SE, SH to address
2596  *
2597  * @rdev: radeon_device pointer
2598  * @max_rb_num: max RBs (render backends) for the asic
2599  * @se_num: number of SEs (shader engines) for the asic
2600  * @sh_per_se: number of SH blocks per SE for the asic
2601  *
2602  * Calculates the bitmask of disabled RBs (CIK).
2603  * Returns the disabled RB bitmask.
2604  */
2605 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2606                               u32 max_rb_num, u32 se_num,
2607                               u32 sh_per_se)
2608 {
2609         u32 data, mask;
2610
2611         data = RREG32(CC_RB_BACKEND_DISABLE);
2612         if (data & 1)
2613                 data &= BACKEND_DISABLE_MASK;
2614         else
2615                 data = 0;
2616         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2617
2618         data >>= BACKEND_DISABLE_SHIFT;
2619
2620         mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2621
2622         return data & mask;
2623 }
2624
2625 /**
2626  * cik_setup_rb - setup the RBs on the asic
2627  *
2628  * @rdev: radeon_device pointer
2629  * @se_num: number of SEs (shader engines) for the asic
2630  * @sh_per_se: number of SH blocks per SE for the asic
2631  * @max_rb_num: max RBs (render backends) for the asic
2632  *
2633  * Configures per-SE/SH RB registers (CIK).
2634  */
2635 static void cik_setup_rb(struct radeon_device *rdev,
2636                          u32 se_num, u32 sh_per_se,
2637                          u32 max_rb_num)
2638 {
2639         int i, j;
2640         u32 data, mask;
2641         u32 disabled_rbs = 0;
2642         u32 enabled_rbs = 0;
2643
2644         for (i = 0; i < se_num; i++) {
2645                 for (j = 0; j < sh_per_se; j++) {
2646                         cik_select_se_sh(rdev, i, j);
2647                         data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2648                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2649                 }
2650         }
2651         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2652
2653         mask = 1;
2654         for (i = 0; i < max_rb_num; i++) {
2655                 if (!(disabled_rbs & mask))
2656                         enabled_rbs |= mask;
2657                 mask <<= 1;
2658         }
2659
2660         for (i = 0; i < se_num; i++) {
2661                 cik_select_se_sh(rdev, i, 0xffffffff);
2662                 data = 0;
2663                 for (j = 0; j < sh_per_se; j++) {
2664                         switch (enabled_rbs & 3) {
2665                         case 1:
2666                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2667                                 break;
2668                         case 2:
2669                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2670                                 break;
2671                         case 3:
2672                         default:
2673                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2674                                 break;
2675                         }
2676                         enabled_rbs >>= 2;
2677                 }
2678                 WREG32(PA_SC_RASTER_CONFIG, data);
2679         }
2680         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2681 }
2682
2683 /**
2684  * cik_gpu_init - setup the 3D engine
2685  *
2686  * @rdev: radeon_device pointer
2687  *
2688  * Configures the 3D engine and tiling configuration
2689  * registers so that the 3D engine is usable.
2690  */
2691 static void cik_gpu_init(struct radeon_device *rdev)
2692 {
2693         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2694         u32 mc_shared_chmap, mc_arb_ramcfg;
2695         u32 hdp_host_path_cntl;
2696         u32 tmp;
2697         int i, j;
2698
2699         switch (rdev->family) {
2700         case CHIP_BONAIRE:
2701                 rdev->config.cik.max_shader_engines = 2;
2702                 rdev->config.cik.max_tile_pipes = 4;
2703                 rdev->config.cik.max_cu_per_sh = 7;
2704                 rdev->config.cik.max_sh_per_se = 1;
2705                 rdev->config.cik.max_backends_per_se = 2;
2706                 rdev->config.cik.max_texture_channel_caches = 4;
2707                 rdev->config.cik.max_gprs = 256;
2708                 rdev->config.cik.max_gs_threads = 32;
2709                 rdev->config.cik.max_hw_contexts = 8;
2710
2711                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2712                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2713                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2714                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2715                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2716                 break;
2717         case CHIP_KAVERI:
2718                 rdev->config.cik.max_shader_engines = 1;
2719                 rdev->config.cik.max_tile_pipes = 4;
2720                 if ((rdev->pdev->device == 0x1304) ||
2721                     (rdev->pdev->device == 0x1305) ||
2722                     (rdev->pdev->device == 0x130C) ||
2723                     (rdev->pdev->device == 0x130F) ||
2724                     (rdev->pdev->device == 0x1310) ||
2725                     (rdev->pdev->device == 0x1311) ||
2726                     (rdev->pdev->device == 0x131C)) {
2727                         rdev->config.cik.max_cu_per_sh = 8;
2728                         rdev->config.cik.max_backends_per_se = 2;
2729                 } else if ((rdev->pdev->device == 0x1309) ||
2730                            (rdev->pdev->device == 0x130A) ||
2731                            (rdev->pdev->device == 0x130D) ||
2732                            (rdev->pdev->device == 0x1313) ||
2733                            (rdev->pdev->device == 0x131D)) {
2734                         rdev->config.cik.max_cu_per_sh = 6;
2735                         rdev->config.cik.max_backends_per_se = 2;
2736                 } else if ((rdev->pdev->device == 0x1306) ||
2737                            (rdev->pdev->device == 0x1307) ||
2738                            (rdev->pdev->device == 0x130B) ||
2739                            (rdev->pdev->device == 0x130E) ||
2740                            (rdev->pdev->device == 0x1315) ||
2741                            (rdev->pdev->device == 0x131B)) {
2742                         rdev->config.cik.max_cu_per_sh = 4;
2743                         rdev->config.cik.max_backends_per_se = 1;
2744                 } else {
2745                         rdev->config.cik.max_cu_per_sh = 3;
2746                         rdev->config.cik.max_backends_per_se = 1;
2747                 }
2748                 rdev->config.cik.max_sh_per_se = 1;
2749                 rdev->config.cik.max_texture_channel_caches = 4;
2750                 rdev->config.cik.max_gprs = 256;
2751                 rdev->config.cik.max_gs_threads = 16;
2752                 rdev->config.cik.max_hw_contexts = 8;
2753
2754                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2755                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2756                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2757                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2758                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2759                 break;
2760         case CHIP_KABINI:
2761         default:
2762                 rdev->config.cik.max_shader_engines = 1;
2763                 rdev->config.cik.max_tile_pipes = 2;
2764                 rdev->config.cik.max_cu_per_sh = 2;
2765                 rdev->config.cik.max_sh_per_se = 1;
2766                 rdev->config.cik.max_backends_per_se = 1;
2767                 rdev->config.cik.max_texture_channel_caches = 2;
2768                 rdev->config.cik.max_gprs = 256;
2769                 rdev->config.cik.max_gs_threads = 16;
2770                 rdev->config.cik.max_hw_contexts = 8;
2771
2772                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2773                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2774                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2775                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2776                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2777                 break;
2778         }
2779
2780         /* Initialize HDP */
2781         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2782                 WREG32((0x2c14 + j), 0x00000000);
2783                 WREG32((0x2c18 + j), 0x00000000);
2784                 WREG32((0x2c1c + j), 0x00000000);
2785                 WREG32((0x2c20 + j), 0x00000000);
2786                 WREG32((0x2c24 + j), 0x00000000);
2787         }
2788
2789         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2790
2791         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2792
2793         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2794         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2795
2796         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2797         rdev->config.cik.mem_max_burst_length_bytes = 256;
2798         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2799         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2800         if (rdev->config.cik.mem_row_size_in_kb > 4)
2801                 rdev->config.cik.mem_row_size_in_kb = 4;
2802         /* XXX use MC settings? */
2803         rdev->config.cik.shader_engine_tile_size = 32;
2804         rdev->config.cik.num_gpus = 1;
2805         rdev->config.cik.multi_gpu_tile_size = 64;
2806
2807         /* fix up row size */
2808         gb_addr_config &= ~ROW_SIZE_MASK;
2809         switch (rdev->config.cik.mem_row_size_in_kb) {
2810         case 1:
2811         default:
2812                 gb_addr_config |= ROW_SIZE(0);
2813                 break;
2814         case 2:
2815                 gb_addr_config |= ROW_SIZE(1);
2816                 break;
2817         case 4:
2818                 gb_addr_config |= ROW_SIZE(2);
2819                 break;
2820         }
2821
2822         /* setup tiling info dword.  gb_addr_config is not adequate since it does
2823          * not have bank info, so create a custom tiling dword.
2824          * bits 3:0   num_pipes
2825          * bits 7:4   num_banks
2826          * bits 11:8  group_size
2827          * bits 15:12 row_size
2828          */
2829         rdev->config.cik.tile_config = 0;
2830         switch (rdev->config.cik.num_tile_pipes) {
2831         case 1:
2832                 rdev->config.cik.tile_config |= (0 << 0);
2833                 break;
2834         case 2:
2835                 rdev->config.cik.tile_config |= (1 << 0);
2836                 break;
2837         case 4:
2838                 rdev->config.cik.tile_config |= (2 << 0);
2839                 break;
2840         case 8:
2841         default:
2842                 /* XXX what about 12? */
2843                 rdev->config.cik.tile_config |= (3 << 0);
2844                 break;
2845         }
2846         rdev->config.cik.tile_config |=
2847                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
2848         rdev->config.cik.tile_config |=
2849                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2850         rdev->config.cik.tile_config |=
2851                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2852
2853         WREG32(GB_ADDR_CONFIG, gb_addr_config);
2854         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2855         WREG32(DMIF_ADDR_CALC, gb_addr_config);
2856         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2857         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2858         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2859         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2860         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2861
2862         cik_tiling_mode_table_init(rdev);
2863
2864         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2865                      rdev->config.cik.max_sh_per_se,
2866                      rdev->config.cik.max_backends_per_se);
2867
2868         /* set HW defaults for 3D engine */
2869         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2870
2871         WREG32(SX_DEBUG_1, 0x20);
2872
2873         WREG32(TA_CNTL_AUX, 0x00010000);
2874
2875         tmp = RREG32(SPI_CONFIG_CNTL);
2876         tmp |= 0x03000000;
2877         WREG32(SPI_CONFIG_CNTL, tmp);
2878
2879         WREG32(SQ_CONFIG, 1);
2880
2881         WREG32(DB_DEBUG, 0);
2882
2883         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2884         tmp |= 0x00000400;
2885         WREG32(DB_DEBUG2, tmp);
2886
2887         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2888         tmp |= 0x00020200;
2889         WREG32(DB_DEBUG3, tmp);
2890
2891         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2892         tmp |= 0x00018208;
2893         WREG32(CB_HW_CONTROL, tmp);
2894
2895         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2896
2897         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2898                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2899                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2900                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2901
2902         WREG32(VGT_NUM_INSTANCES, 1);
2903
2904         WREG32(CP_PERFMON_CNTL, 0);
2905
2906         WREG32(SQ_CONFIG, 0);
2907
2908         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2909                                           FORCE_EOV_MAX_REZ_CNT(255)));
2910
2911         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2912                AUTO_INVLD_EN(ES_AND_GS_AUTO));
2913
2914         WREG32(VGT_GS_VERTEX_REUSE, 16);
2915         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2916
2917         tmp = RREG32(HDP_MISC_CNTL);
2918         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2919         WREG32(HDP_MISC_CNTL, tmp);
2920
2921         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2922         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2923
2924         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2925         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2926
2927         udelay(50);
2928 }
2929
2930 /*
2931  * GPU scratch registers helpers function.
2932  */
2933 /**
2934  * cik_scratch_init - setup driver info for CP scratch regs
2935  *
2936  * @rdev: radeon_device pointer
2937  *
2938  * Set up the number and offset of the CP scratch registers.
2939  * NOTE: use of CP scratch registers is a legacy inferface and
2940  * is not used by default on newer asics (r6xx+).  On newer asics,
2941  * memory buffers are used for fences rather than scratch regs.
2942  */
2943 static void cik_scratch_init(struct radeon_device *rdev)
2944 {
2945         int i;
2946
2947         rdev->scratch.num_reg = 7;
2948         rdev->scratch.reg_base = SCRATCH_REG0;
2949         for (i = 0; i < rdev->scratch.num_reg; i++) {
2950                 rdev->scratch.free[i] = true;
2951                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2952         }
2953 }
2954
2955 /**
2956  * cik_ring_test - basic gfx ring test
2957  *
2958  * @rdev: radeon_device pointer
2959  * @ring: radeon_ring structure holding ring information
2960  *
2961  * Allocate a scratch register and write to it using the gfx ring (CIK).
2962  * Provides a basic gfx ring test to verify that the ring is working.
2963  * Used by cik_cp_gfx_resume();
2964  * Returns 0 on success, error on failure.
2965  */
2966 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2967 {
2968         uint32_t scratch;
2969         uint32_t tmp = 0;
2970         unsigned i;
2971         int r;
2972
2973         r = radeon_scratch_get(rdev, &scratch);
2974         if (r) {
2975                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2976                 return r;
2977         }
2978         WREG32(scratch, 0xCAFEDEAD);
2979         r = radeon_ring_lock(rdev, ring, 3);
2980         if (r) {
2981                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2982                 radeon_scratch_free(rdev, scratch);
2983                 return r;
2984         }
2985         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2986         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2987         radeon_ring_write(ring, 0xDEADBEEF);
2988         radeon_ring_unlock_commit(rdev, ring);
2989
2990         for (i = 0; i < rdev->usec_timeout; i++) {
2991                 tmp = RREG32(scratch);
2992                 if (tmp == 0xDEADBEEF)
2993                         break;
2994                 DRM_UDELAY(1);
2995         }
2996         if (i < rdev->usec_timeout) {
2997                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2998         } else {
2999                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3000                           ring->idx, scratch, tmp);
3001                 r = -EINVAL;
3002         }
3003         radeon_scratch_free(rdev, scratch);
3004         return r;
3005 }
3006
3007 /**
3008  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3009  *
3010  * @rdev: radeon_device pointer
3011  * @fence: radeon fence object
3012  *
3013  * Emits a fence sequnce number on the gfx ring and flushes
3014  * GPU caches.
3015  */
3016 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3017                              struct radeon_fence *fence)
3018 {
3019         struct radeon_ring *ring = &rdev->ring[fence->ring];
3020         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3021
3022         /* EVENT_WRITE_EOP - flush caches, send int */
3023         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3024         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3025                                  EOP_TC_ACTION_EN |
3026                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3027                                  EVENT_INDEX(5)));
3028         radeon_ring_write(ring, addr & 0xfffffffc);
3029         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3030         radeon_ring_write(ring, fence->seq);
3031         radeon_ring_write(ring, 0);
3032         /* HDP flush */
3033         /* We should be using the new WAIT_REG_MEM special op packet here
3034          * but it causes the CP to hang
3035          */
3036         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3037         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3038                                  WRITE_DATA_DST_SEL(0)));
3039         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3040         radeon_ring_write(ring, 0);
3041         radeon_ring_write(ring, 0);
3042 }
3043
3044 /**
3045  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3046  *
3047  * @rdev: radeon_device pointer
3048  * @fence: radeon fence object
3049  *
3050  * Emits a fence sequnce number on the compute ring and flushes
3051  * GPU caches.
3052  */
3053 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3054                                  struct radeon_fence *fence)
3055 {
3056         struct radeon_ring *ring = &rdev->ring[fence->ring];
3057         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3058
3059         /* RELEASE_MEM - flush caches, send int */
3060         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3061         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3062                                  EOP_TC_ACTION_EN |
3063                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3064                                  EVENT_INDEX(5)));
3065         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3066         radeon_ring_write(ring, addr & 0xfffffffc);
3067         radeon_ring_write(ring, upper_32_bits(addr));
3068         radeon_ring_write(ring, fence->seq);
3069         radeon_ring_write(ring, 0);
3070         /* HDP flush */
3071         /* We should be using the new WAIT_REG_MEM special op packet here
3072          * but it causes the CP to hang
3073          */
3074         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3075         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3076                                  WRITE_DATA_DST_SEL(0)));
3077         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3078         radeon_ring_write(ring, 0);
3079         radeon_ring_write(ring, 0);
3080 }
3081
3082 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3083                              struct radeon_ring *ring,
3084                              struct radeon_semaphore *semaphore,
3085                              bool emit_wait)
3086 {
3087         uint64_t addr = semaphore->gpu_addr;
3088         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3089
3090         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3091         radeon_ring_write(ring, addr & 0xffffffff);
3092         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3093 }
3094
3095 /**
3096  * cik_copy_cpdma - copy pages using the CP DMA engine
3097  *
3098  * @rdev: radeon_device pointer
3099  * @src_offset: src GPU address
3100  * @dst_offset: dst GPU address
3101  * @num_gpu_pages: number of GPU pages to xfer
3102  * @fence: radeon fence object
3103  *
3104  * Copy GPU paging using the CP DMA engine (CIK+).
3105  * Used by the radeon ttm implementation to move pages if
3106  * registered as the asic copy callback.
3107  */
3108 int cik_copy_cpdma(struct radeon_device *rdev,
3109                    uint64_t src_offset, uint64_t dst_offset,
3110                    unsigned num_gpu_pages,
3111                    struct radeon_fence **fence)
3112 {
3113         struct radeon_semaphore *sem = NULL;
3114         int ring_index = rdev->asic->copy.blit_ring_index;
3115         struct radeon_ring *ring = &rdev->ring[ring_index];
3116         u32 size_in_bytes, cur_size_in_bytes, control;
3117         int i, num_loops;
3118         int r = 0;
3119
3120         r = radeon_semaphore_create(rdev, &sem);
3121         if (r) {
3122                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3123                 return r;
3124         }
3125
3126         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3127         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3128         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3129         if (r) {
3130                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3131                 radeon_semaphore_free(rdev, &sem, NULL);
3132                 return r;
3133         }
3134
3135         if (radeon_fence_need_sync(*fence, ring->idx)) {
3136                 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3137                                             ring->idx);
3138                 radeon_fence_note_sync(*fence, ring->idx);
3139         } else {
3140                 radeon_semaphore_free(rdev, &sem, NULL);
3141         }
3142
3143         for (i = 0; i < num_loops; i++) {
3144                 cur_size_in_bytes = size_in_bytes;
3145                 if (cur_size_in_bytes > 0x1fffff)
3146                         cur_size_in_bytes = 0x1fffff;
3147                 size_in_bytes -= cur_size_in_bytes;
3148                 control = 0;
3149                 if (size_in_bytes == 0)
3150                         control |= PACKET3_DMA_DATA_CP_SYNC;
3151                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3152                 radeon_ring_write(ring, control);
3153                 radeon_ring_write(ring, lower_32_bits(src_offset));
3154                 radeon_ring_write(ring, upper_32_bits(src_offset));
3155                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3156                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3157                 radeon_ring_write(ring, cur_size_in_bytes);
3158                 src_offset += cur_size_in_bytes;
3159                 dst_offset += cur_size_in_bytes;
3160         }
3161
3162         r = radeon_fence_emit(rdev, fence, ring->idx);
3163         if (r) {
3164                 radeon_ring_unlock_undo(rdev, ring);
3165                 return r;
3166         }
3167
3168         radeon_ring_unlock_commit(rdev, ring);
3169         radeon_semaphore_free(rdev, &sem, *fence);
3170
3171         return r;
3172 }
3173
3174 /*
3175  * IB stuff
3176  */
3177 /**
3178  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3179  *
3180  * @rdev: radeon_device pointer
3181  * @ib: radeon indirect buffer object
3182  *
3183  * Emits an DE (drawing engine) or CE (constant engine) IB
3184  * on the gfx ring.  IBs are usually generated by userspace
3185  * acceleration drivers and submitted to the kernel for
3186  * sheduling on the ring.  This function schedules the IB
3187  * on the gfx ring for execution by the GPU.
3188  */
3189 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3190 {
3191         struct radeon_ring *ring = &rdev->ring[ib->ring];
3192         u32 header, control = INDIRECT_BUFFER_VALID;
3193
3194         if (ib->is_const_ib) {
3195                 /* set switch buffer packet before const IB */
3196                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3197                 radeon_ring_write(ring, 0);
3198
3199                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3200         } else {
3201                 u32 next_rptr;
3202                 if (ring->rptr_save_reg) {
3203                         next_rptr = ring->wptr + 3 + 4;
3204                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3205                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3206                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3207                         radeon_ring_write(ring, next_rptr);
3208                 } else if (rdev->wb.enabled) {
3209                         next_rptr = ring->wptr + 5 + 4;
3210                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3211                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3212                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3213                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3214                         radeon_ring_write(ring, next_rptr);
3215                 }
3216
3217                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3218         }
3219
3220         control |= ib->length_dw |
3221                 (ib->vm ? (ib->vm->id << 24) : 0);
3222
3223         radeon_ring_write(ring, header);
3224         radeon_ring_write(ring,
3225 #ifdef __BIG_ENDIAN
3226                           (2 << 0) |
3227 #endif
3228                           (ib->gpu_addr & 0xFFFFFFFC));
3229         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3230         radeon_ring_write(ring, control);
3231 }
3232
3233 /**
3234  * cik_ib_test - basic gfx ring IB test
3235  *
3236  * @rdev: radeon_device pointer
3237  * @ring: radeon_ring structure holding ring information
3238  *
3239  * Allocate an IB and execute it on the gfx ring (CIK).
3240  * Provides a basic gfx ring test to verify that IBs are working.
3241  * Returns 0 on success, error on failure.
3242  */
3243 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3244 {
3245         struct radeon_ib ib;
3246         uint32_t scratch;
3247         uint32_t tmp = 0;
3248         unsigned i;
3249         int r;
3250
3251         r = radeon_scratch_get(rdev, &scratch);
3252         if (r) {
3253                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3254                 return r;
3255         }
3256         WREG32(scratch, 0xCAFEDEAD);
3257         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3258         if (r) {
3259                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3260                 radeon_scratch_free(rdev, scratch);
3261                 return r;
3262         }
3263         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3264         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3265         ib.ptr[2] = 0xDEADBEEF;
3266         ib.length_dw = 3;
3267         r = radeon_ib_schedule(rdev, &ib, NULL);
3268         if (r) {
3269                 radeon_scratch_free(rdev, scratch);
3270                 radeon_ib_free(rdev, &ib);
3271                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3272                 return r;
3273         }
3274         r = radeon_fence_wait(ib.fence, false);
3275         if (r) {
3276                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3277                 radeon_scratch_free(rdev, scratch);
3278                 radeon_ib_free(rdev, &ib);
3279                 return r;
3280         }
3281         for (i = 0; i < rdev->usec_timeout; i++) {
3282                 tmp = RREG32(scratch);
3283                 if (tmp == 0xDEADBEEF)
3284                         break;
3285                 DRM_UDELAY(1);
3286         }
3287         if (i < rdev->usec_timeout) {
3288                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3289         } else {
3290                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3291                           scratch, tmp);
3292                 r = -EINVAL;
3293         }
3294         radeon_scratch_free(rdev, scratch);
3295         radeon_ib_free(rdev, &ib);
3296         return r;
3297 }
3298
3299 /*
3300  * CP.
3301  * On CIK, gfx and compute now have independant command processors.
3302  *
3303  * GFX
3304  * Gfx consists of a single ring and can process both gfx jobs and
3305  * compute jobs.  The gfx CP consists of three microengines (ME):
3306  * PFP - Pre-Fetch Parser
3307  * ME - Micro Engine
3308  * CE - Constant Engine
3309  * The PFP and ME make up what is considered the Drawing Engine (DE).
3310  * The CE is an asynchronous engine used for updating buffer desciptors
3311  * used by the DE so that they can be loaded into cache in parallel
3312  * while the DE is processing state update packets.
3313  *
3314  * Compute
3315  * The compute CP consists of two microengines (ME):
3316  * MEC1 - Compute MicroEngine 1
3317  * MEC2 - Compute MicroEngine 2
3318  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3319  * The queues are exposed to userspace and are programmed directly
3320  * by the compute runtime.
3321  */
3322 /**
3323  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3324  *
3325  * @rdev: radeon_device pointer
3326  * @enable: enable or disable the MEs
3327  *
3328  * Halts or unhalts the gfx MEs.
3329  */
3330 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3331 {
3332         if (enable)
3333                 WREG32(CP_ME_CNTL, 0);
3334         else {
3335                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3336                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3337         }
3338         udelay(50);
3339 }
3340
3341 /**
3342  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3343  *
3344  * @rdev: radeon_device pointer
3345  *
3346  * Loads the gfx PFP, ME, and CE ucode.
3347  * Returns 0 for success, -EINVAL if the ucode is not available.
3348  */
3349 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3350 {
3351         const __be32 *fw_data;
3352         int i;
3353
3354         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3355                 return -EINVAL;
3356
3357         cik_cp_gfx_enable(rdev, false);
3358
3359         /* PFP */
3360         fw_data = (const __be32 *)rdev->pfp_fw->data;
3361         WREG32(CP_PFP_UCODE_ADDR, 0);
3362         for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3363                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3364         WREG32(CP_PFP_UCODE_ADDR, 0);
3365
3366         /* CE */
3367         fw_data = (const __be32 *)rdev->ce_fw->data;
3368         WREG32(CP_CE_UCODE_ADDR, 0);
3369         for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3370                 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3371         WREG32(CP_CE_UCODE_ADDR, 0);
3372
3373         /* ME */
3374         fw_data = (const __be32 *)rdev->me_fw->data;
3375         WREG32(CP_ME_RAM_WADDR, 0);
3376         for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3377                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3378         WREG32(CP_ME_RAM_WADDR, 0);
3379
3380         WREG32(CP_PFP_UCODE_ADDR, 0);
3381         WREG32(CP_CE_UCODE_ADDR, 0);
3382         WREG32(CP_ME_RAM_WADDR, 0);
3383         WREG32(CP_ME_RAM_RADDR, 0);
3384         return 0;
3385 }
3386
3387 /**
3388  * cik_cp_gfx_start - start the gfx ring
3389  *
3390  * @rdev: radeon_device pointer
3391  *
3392  * Enables the ring and loads the clear state context and other
3393  * packets required to init the ring.
3394  * Returns 0 for success, error for failure.
3395  */
3396 static int cik_cp_gfx_start(struct radeon_device *rdev)
3397 {
3398         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3399         int r, i;
3400
3401         /* init the CP */
3402         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3403         WREG32(CP_ENDIAN_SWAP, 0);
3404         WREG32(CP_DEVICE_ID, 1);
3405
3406         cik_cp_gfx_enable(rdev, true);
3407
3408         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3409         if (r) {
3410                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3411                 return r;
3412         }
3413
3414         /* init the CE partitions.  CE only used for gfx on CIK */
3415         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3416         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3417         radeon_ring_write(ring, 0xc000);
3418         radeon_ring_write(ring, 0xc000);
3419
3420         /* setup clear context state */
3421         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3422         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3423
3424         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3425         radeon_ring_write(ring, 0x80000000);
3426         radeon_ring_write(ring, 0x80000000);
3427
3428         for (i = 0; i < cik_default_size; i++)
3429                 radeon_ring_write(ring, cik_default_state[i]);
3430
3431         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3432         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3433
3434         /* set clear context state */
3435         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3436         radeon_ring_write(ring, 0);
3437
3438         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3439         radeon_ring_write(ring, 0x00000316);
3440         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3441         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3442
3443         radeon_ring_unlock_commit(rdev, ring);
3444
3445         return 0;
3446 }
3447
3448 /**
3449  * cik_cp_gfx_fini - stop the gfx ring
3450  *
3451  * @rdev: radeon_device pointer
3452  *
3453  * Stop the gfx ring and tear down the driver ring
3454  * info.
3455  */
3456 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3457 {
3458         cik_cp_gfx_enable(rdev, false);
3459         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3460 }
3461
3462 /**
3463  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3464  *
3465  * @rdev: radeon_device pointer
3466  *
3467  * Program the location and size of the gfx ring buffer
3468  * and test it to make sure it's working.
3469  * Returns 0 for success, error for failure.
3470  */
3471 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3472 {
3473         struct radeon_ring *ring;
3474         u32 tmp;
3475         u32 rb_bufsz;
3476         u64 rb_addr;
3477         int r;
3478
3479         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3480         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3481
3482         /* Set the write pointer delay */
3483         WREG32(CP_RB_WPTR_DELAY, 0);
3484
3485         /* set the RB to use vmid 0 */
3486         WREG32(CP_RB_VMID, 0);
3487
3488         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3489
3490         /* ring 0 - compute and gfx */
3491         /* Set ring buffer size */
3492         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3493         rb_bufsz = order_base_2(ring->ring_size / 8);
3494         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3495 #ifdef __BIG_ENDIAN
3496         tmp |= BUF_SWAP_32BIT;
3497 #endif
3498         WREG32(CP_RB0_CNTL, tmp);
3499
3500         /* Initialize the ring buffer's read and write pointers */
3501         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3502         ring->wptr = 0;
3503         WREG32(CP_RB0_WPTR, ring->wptr);
3504
3505         /* set the wb address wether it's enabled or not */
3506         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3507         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3508
3509         /* scratch register shadowing is no longer supported */
3510         WREG32(SCRATCH_UMSK, 0);
3511
3512         if (!rdev->wb.enabled)
3513                 tmp |= RB_NO_UPDATE;
3514
3515         mdelay(1);
3516         WREG32(CP_RB0_CNTL, tmp);
3517
3518         rb_addr = ring->gpu_addr >> 8;
3519         WREG32(CP_RB0_BASE, rb_addr);
3520         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3521
3522         ring->rptr = RREG32(CP_RB0_RPTR);
3523
3524         /* start the ring */
3525         cik_cp_gfx_start(rdev);
3526         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3527         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3528         if (r) {
3529                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3530                 return r;
3531         }
3532         return 0;
3533 }
3534
3535 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3536                               struct radeon_ring *ring)
3537 {
3538         u32 rptr;
3539
3540
3541
3542         if (rdev->wb.enabled) {
3543                 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3544         } else {
3545                 mutex_lock(&rdev->srbm_mutex);
3546                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3547                 rptr = RREG32(CP_HQD_PQ_RPTR);
3548                 cik_srbm_select(rdev, 0, 0, 0, 0);
3549                 mutex_unlock(&rdev->srbm_mutex);
3550         }
3551
3552         return rptr;
3553 }
3554
3555 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3556                               struct radeon_ring *ring)
3557 {
3558         u32 wptr;
3559
3560         if (rdev->wb.enabled) {
3561                 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3562         } else {
3563                 mutex_lock(&rdev->srbm_mutex);
3564                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3565                 wptr = RREG32(CP_HQD_PQ_WPTR);
3566                 cik_srbm_select(rdev, 0, 0, 0, 0);
3567                 mutex_unlock(&rdev->srbm_mutex);
3568         }
3569
3570         return wptr;
3571 }
3572
3573 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3574                                struct radeon_ring *ring)
3575 {
3576         rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3577         WDOORBELL32(ring->doorbell_offset, ring->wptr);
3578 }
3579
3580 /**
3581  * cik_cp_compute_enable - enable/disable the compute CP MEs
3582  *
3583  * @rdev: radeon_device pointer
3584  * @enable: enable or disable the MEs
3585  *
3586  * Halts or unhalts the compute MEs.
3587  */
3588 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3589 {
3590         if (enable)
3591                 WREG32(CP_MEC_CNTL, 0);
3592         else
3593                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3594         udelay(50);
3595 }
3596
3597 /**
3598  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3599  *
3600  * @rdev: radeon_device pointer
3601  *
3602  * Loads the compute MEC1&2 ucode.
3603  * Returns 0 for success, -EINVAL if the ucode is not available.
3604  */
3605 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3606 {
3607         const __be32 *fw_data;
3608         int i;
3609
3610         if (!rdev->mec_fw)
3611                 return -EINVAL;
3612
3613         cik_cp_compute_enable(rdev, false);
3614
3615         /* MEC1 */
3616         fw_data = (const __be32 *)rdev->mec_fw->data;
3617         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3618         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3619                 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3620         WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3621
3622         if (rdev->family == CHIP_KAVERI) {
3623                 /* MEC2 */
3624                 fw_data = (const __be32 *)rdev->mec_fw->data;
3625                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3626                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3627                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3628                 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3629         }
3630
3631         return 0;
3632 }
3633
3634 /**
3635  * cik_cp_compute_start - start the compute queues
3636  *
3637  * @rdev: radeon_device pointer
3638  *
3639  * Enable the compute queues.
3640  * Returns 0 for success, error for failure.
3641  */
3642 static int cik_cp_compute_start(struct radeon_device *rdev)
3643 {
3644         cik_cp_compute_enable(rdev, true);
3645
3646         return 0;
3647 }
3648
3649 /**
3650  * cik_cp_compute_fini - stop the compute queues
3651  *
3652  * @rdev: radeon_device pointer
3653  *
3654  * Stop the compute queues and tear down the driver queue
3655  * info.
3656  */
3657 static void cik_cp_compute_fini(struct radeon_device *rdev)
3658 {
3659         int i, idx, r;
3660
3661         cik_cp_compute_enable(rdev, false);
3662
3663         for (i = 0; i < 2; i++) {
3664                 if (i == 0)
3665                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3666                 else
3667                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3668
3669                 if (rdev->ring[idx].mqd_obj) {
3670                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3671                         if (unlikely(r != 0))
3672                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3673
3674                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3675                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3676
3677                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3678                         rdev->ring[idx].mqd_obj = NULL;
3679                 }
3680         }
3681 }
3682
3683 static void cik_mec_fini(struct radeon_device *rdev)
3684 {
3685         int r;
3686
3687         if (rdev->mec.hpd_eop_obj) {
3688                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3689                 if (unlikely(r != 0))
3690                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3691                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3692                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3693
3694                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3695                 rdev->mec.hpd_eop_obj = NULL;
3696         }
3697 }
3698
3699 #define MEC_HPD_SIZE 2048
3700
3701 static int cik_mec_init(struct radeon_device *rdev)
3702 {
3703         int r;
3704         u32 *hpd;
3705
3706         /*
3707          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3708          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3709          */
3710         if (rdev->family == CHIP_KAVERI)
3711                 rdev->mec.num_mec = 2;
3712         else
3713                 rdev->mec.num_mec = 1;
3714         rdev->mec.num_pipe = 4;
3715         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3716
3717         if (rdev->mec.hpd_eop_obj == NULL) {
3718                 r = radeon_bo_create(rdev,
3719                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3720                                      PAGE_SIZE, true,
3721                                      RADEON_GEM_DOMAIN_GTT, NULL,
3722                                      &rdev->mec.hpd_eop_obj);
3723                 if (r) {
3724                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3725                         return r;
3726                 }
3727         }
3728
3729         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3730         if (unlikely(r != 0)) {
3731                 cik_mec_fini(rdev);
3732                 return r;
3733         }
3734         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3735                           &rdev->mec.hpd_eop_gpu_addr);
3736         if (r) {
3737                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3738                 cik_mec_fini(rdev);
3739                 return r;
3740         }
3741         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3742         if (r) {
3743                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3744                 cik_mec_fini(rdev);
3745                 return r;
3746         }
3747
3748         /* clear memory.  Not sure if this is required or not */
3749         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3750
3751         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3752         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3753
3754         return 0;
3755 }
3756
3757 struct hqd_registers
3758 {
3759         u32 cp_mqd_base_addr;
3760         u32 cp_mqd_base_addr_hi;
3761         u32 cp_hqd_active;
3762         u32 cp_hqd_vmid;
3763         u32 cp_hqd_persistent_state;
3764         u32 cp_hqd_pipe_priority;
3765         u32 cp_hqd_queue_priority;
3766         u32 cp_hqd_quantum;
3767         u32 cp_hqd_pq_base;
3768         u32 cp_hqd_pq_base_hi;
3769         u32 cp_hqd_pq_rptr;
3770         u32 cp_hqd_pq_rptr_report_addr;
3771         u32 cp_hqd_pq_rptr_report_addr_hi;
3772         u32 cp_hqd_pq_wptr_poll_addr;
3773         u32 cp_hqd_pq_wptr_poll_addr_hi;
3774         u32 cp_hqd_pq_doorbell_control;
3775         u32 cp_hqd_pq_wptr;
3776         u32 cp_hqd_pq_control;
3777         u32 cp_hqd_ib_base_addr;
3778         u32 cp_hqd_ib_base_addr_hi;
3779         u32 cp_hqd_ib_rptr;
3780         u32 cp_hqd_ib_control;
3781         u32 cp_hqd_iq_timer;
3782         u32 cp_hqd_iq_rptr;
3783         u32 cp_hqd_dequeue_request;
3784         u32 cp_hqd_dma_offload;
3785         u32 cp_hqd_sema_cmd;
3786         u32 cp_hqd_msg_type;
3787         u32 cp_hqd_atomic0_preop_lo;
3788         u32 cp_hqd_atomic0_preop_hi;
3789         u32 cp_hqd_atomic1_preop_lo;
3790         u32 cp_hqd_atomic1_preop_hi;
3791         u32 cp_hqd_hq_scheduler0;
3792         u32 cp_hqd_hq_scheduler1;
3793         u32 cp_mqd_control;
3794 };
3795
3796 struct bonaire_mqd
3797 {
3798         u32 header;
3799         u32 dispatch_initiator;
3800         u32 dimensions[3];
3801         u32 start_idx[3];
3802         u32 num_threads[3];
3803         u32 pipeline_stat_enable;
3804         u32 perf_counter_enable;
3805         u32 pgm[2];
3806         u32 tba[2];
3807         u32 tma[2];
3808         u32 pgm_rsrc[2];
3809         u32 vmid;
3810         u32 resource_limits;
3811         u32 static_thread_mgmt01[2];
3812         u32 tmp_ring_size;
3813         u32 static_thread_mgmt23[2];
3814         u32 restart[3];
3815         u32 thread_trace_enable;
3816         u32 reserved1;
3817         u32 user_data[16];
3818         u32 vgtcs_invoke_count[2];
3819         struct hqd_registers queue_state;
3820         u32 dequeue_cntr;
3821         u32 interrupt_queue[64];
3822 };
3823
3824 /**
3825  * cik_cp_compute_resume - setup the compute queue registers
3826  *
3827  * @rdev: radeon_device pointer
3828  *
3829  * Program the compute queues and test them to make sure they
3830  * are working.
3831  * Returns 0 for success, error for failure.
3832  */
3833 static int cik_cp_compute_resume(struct radeon_device *rdev)
3834 {
3835         int r, i, idx;
3836         u32 tmp;
3837         bool use_doorbell = true;
3838         u64 hqd_gpu_addr;
3839         u64 mqd_gpu_addr;
3840         u64 eop_gpu_addr;
3841         u64 wb_gpu_addr;
3842         u32 *buf;
3843         struct bonaire_mqd *mqd;
3844
3845         r = cik_cp_compute_start(rdev);
3846         if (r)
3847                 return r;
3848
3849         /* fix up chicken bits */
3850         tmp = RREG32(CP_CPF_DEBUG);
3851         tmp |= (1 << 23);
3852         WREG32(CP_CPF_DEBUG, tmp);
3853
3854         /* init the pipes */
3855         mutex_lock(&rdev->srbm_mutex);
3856         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3857                 int me = (i < 4) ? 1 : 2;
3858                 int pipe = (i < 4) ? i : (i - 4);
3859
3860                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3861
3862                 cik_srbm_select(rdev, me, pipe, 0, 0);
3863
3864                 /* write the EOP addr */
3865                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3866                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3867
3868                 /* set the VMID assigned */
3869                 WREG32(CP_HPD_EOP_VMID, 0);
3870
3871                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3872                 tmp = RREG32(CP_HPD_EOP_CONTROL);
3873                 tmp &= ~EOP_SIZE_MASK;
3874                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
3875                 WREG32(CP_HPD_EOP_CONTROL, tmp);
3876         }
3877         cik_srbm_select(rdev, 0, 0, 0, 0);
3878         mutex_unlock(&rdev->srbm_mutex);
3879
3880         /* init the queues.  Just two for now. */
3881         for (i = 0; i < 2; i++) {
3882                 if (i == 0)
3883                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
3884                 else
3885                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
3886
3887                 if (rdev->ring[idx].mqd_obj == NULL) {
3888                         r = radeon_bo_create(rdev,
3889                                              sizeof(struct bonaire_mqd),
3890                                              PAGE_SIZE, true,
3891                                              RADEON_GEM_DOMAIN_GTT, NULL,
3892                                              &rdev->ring[idx].mqd_obj);
3893                         if (r) {
3894                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3895                                 return r;
3896                         }
3897                 }
3898
3899                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3900                 if (unlikely(r != 0)) {
3901                         cik_cp_compute_fini(rdev);
3902                         return r;
3903                 }
3904                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3905                                   &mqd_gpu_addr);
3906                 if (r) {
3907                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3908                         cik_cp_compute_fini(rdev);
3909                         return r;
3910                 }
3911                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3912                 if (r) {
3913                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3914                         cik_cp_compute_fini(rdev);
3915                         return r;
3916                 }
3917
3918                 /* doorbell offset */
3919                 rdev->ring[idx].doorbell_offset =
3920                         (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3921
3922                 /* init the mqd struct */
3923                 memset(buf, 0, sizeof(struct bonaire_mqd));
3924
3925                 mqd = (struct bonaire_mqd *)buf;
3926                 mqd->header = 0xC0310800;
3927                 mqd->static_thread_mgmt01[0] = 0xffffffff;
3928                 mqd->static_thread_mgmt01[1] = 0xffffffff;
3929                 mqd->static_thread_mgmt23[0] = 0xffffffff;
3930                 mqd->static_thread_mgmt23[1] = 0xffffffff;
3931
3932                 mutex_lock(&rdev->srbm_mutex);
3933                 cik_srbm_select(rdev, rdev->ring[idx].me,
3934                                 rdev->ring[idx].pipe,
3935                                 rdev->ring[idx].queue, 0);
3936
3937                 /* disable wptr polling */
3938                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3939                 tmp &= ~WPTR_POLL_EN;
3940                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3941
3942                 /* enable doorbell? */
3943                 mqd->queue_state.cp_hqd_pq_doorbell_control =
3944                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3945                 if (use_doorbell)
3946                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3947                 else
3948                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3949                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3950                        mqd->queue_state.cp_hqd_pq_doorbell_control);
3951
3952                 /* disable the queue if it's active */
3953                 mqd->queue_state.cp_hqd_dequeue_request = 0;
3954                 mqd->queue_state.cp_hqd_pq_rptr = 0;
3955                 mqd->queue_state.cp_hqd_pq_wptr= 0;
3956                 if (RREG32(CP_HQD_ACTIVE) & 1) {
3957                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3958                         for (i = 0; i < rdev->usec_timeout; i++) {
3959                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3960                                         break;
3961                                 udelay(1);
3962                         }
3963                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3964                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3965                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3966                 }
3967
3968                 /* set the pointer to the MQD */
3969                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3970                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3971                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3972                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3973                 /* set MQD vmid to 0 */
3974                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3975                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3976                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3977
3978                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3979                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3980                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3981                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3982                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3983                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3984
3985                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3986                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3987                 mqd->queue_state.cp_hqd_pq_control &=
3988                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3989
3990                 mqd->queue_state.cp_hqd_pq_control |=
3991                         order_base_2(rdev->ring[idx].ring_size / 8);
3992                 mqd->queue_state.cp_hqd_pq_control |=
3993                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3994 #ifdef __BIG_ENDIAN
3995                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3996 #endif
3997                 mqd->queue_state.cp_hqd_pq_control &=
3998                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3999                 mqd->queue_state.cp_hqd_pq_control |=
4000                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4001                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4002
4003                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4004                 if (i == 0)
4005                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4006                 else
4007                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4008                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4009                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4010                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4011                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4012                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4013
4014                 /* set the wb address wether it's enabled or not */
4015                 if (i == 0)
4016                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4017                 else
4018                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4019                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4020                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4021                         upper_32_bits(wb_gpu_addr) & 0xffff;
4022                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4023                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4024                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4025                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4026
4027                 /* enable the doorbell if requested */
4028                 if (use_doorbell) {
4029                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4030                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4031                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4032                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4033                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
4034                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4035                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4036                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4037
4038                 } else {
4039                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4040                 }
4041                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4042                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4043
4044                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4045                 rdev->ring[idx].wptr = 0;
4046                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4047                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4048                 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4049                 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4050
4051                 /* set the vmid for the queue */
4052                 mqd->queue_state.cp_hqd_vmid = 0;
4053                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4054
4055                 /* activate the queue */
4056                 mqd->queue_state.cp_hqd_active = 1;
4057                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4058
4059                 cik_srbm_select(rdev, 0, 0, 0, 0);
4060                 mutex_unlock(&rdev->srbm_mutex);
4061
4062                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4063                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4064
4065                 rdev->ring[idx].ready = true;
4066                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4067                 if (r)
4068                         rdev->ring[idx].ready = false;
4069         }
4070
4071         return 0;
4072 }
4073
4074 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4075 {
4076         cik_cp_gfx_enable(rdev, enable);
4077         cik_cp_compute_enable(rdev, enable);
4078 }
4079
4080 static int cik_cp_load_microcode(struct radeon_device *rdev)
4081 {
4082         int r;
4083
4084         r = cik_cp_gfx_load_microcode(rdev);
4085         if (r)
4086                 return r;
4087         r = cik_cp_compute_load_microcode(rdev);
4088         if (r)
4089                 return r;
4090
4091         return 0;
4092 }
4093
4094 static void cik_cp_fini(struct radeon_device *rdev)
4095 {
4096         cik_cp_gfx_fini(rdev);
4097         cik_cp_compute_fini(rdev);
4098 }
4099
4100 static int cik_cp_resume(struct radeon_device *rdev)
4101 {
4102         int r;
4103
4104         cik_enable_gui_idle_interrupt(rdev, false);
4105
4106         r = cik_cp_load_microcode(rdev);
4107         if (r)
4108                 return r;
4109
4110         r = cik_cp_gfx_resume(rdev);
4111         if (r)
4112                 return r;
4113         r = cik_cp_compute_resume(rdev);
4114         if (r)
4115                 return r;
4116
4117         cik_enable_gui_idle_interrupt(rdev, true);
4118
4119         return 0;
4120 }
4121
4122 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4123 {
4124         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4125                 RREG32(GRBM_STATUS));
4126         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4127                 RREG32(GRBM_STATUS2));
4128         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4129                 RREG32(GRBM_STATUS_SE0));
4130         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4131                 RREG32(GRBM_STATUS_SE1));
4132         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4133                 RREG32(GRBM_STATUS_SE2));
4134         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4135                 RREG32(GRBM_STATUS_SE3));
4136         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4137                 RREG32(SRBM_STATUS));
4138         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4139                 RREG32(SRBM_STATUS2));
4140         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4141                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4142         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4143                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4144         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4145         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4146                  RREG32(CP_STALLED_STAT1));
4147         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4148                  RREG32(CP_STALLED_STAT2));
4149         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4150                  RREG32(CP_STALLED_STAT3));
4151         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4152                  RREG32(CP_CPF_BUSY_STAT));
4153         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4154                  RREG32(CP_CPF_STALLED_STAT1));
4155         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4156         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4157         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4158                  RREG32(CP_CPC_STALLED_STAT1));
4159         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4160 }
4161
4162 /**
4163  * cik_gpu_check_soft_reset - check which blocks are busy
4164  *
4165  * @rdev: radeon_device pointer
4166  *
4167  * Check which blocks are busy and return the relevant reset
4168  * mask to be used by cik_gpu_soft_reset().
4169  * Returns a mask of the blocks to be reset.
4170  */
4171 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4172 {
4173         u32 reset_mask = 0;
4174         u32 tmp;
4175
4176         /* GRBM_STATUS */
4177         tmp = RREG32(GRBM_STATUS);
4178         if (tmp & (PA_BUSY | SC_BUSY |
4179                    BCI_BUSY | SX_BUSY |
4180                    TA_BUSY | VGT_BUSY |
4181                    DB_BUSY | CB_BUSY |
4182                    GDS_BUSY | SPI_BUSY |
4183                    IA_BUSY | IA_BUSY_NO_DMA))
4184                 reset_mask |= RADEON_RESET_GFX;
4185
4186         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4187                 reset_mask |= RADEON_RESET_CP;
4188
4189         /* GRBM_STATUS2 */
4190         tmp = RREG32(GRBM_STATUS2);
4191         if (tmp & RLC_BUSY)
4192                 reset_mask |= RADEON_RESET_RLC;
4193
4194         /* SDMA0_STATUS_REG */
4195         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4196         if (!(tmp & SDMA_IDLE))
4197                 reset_mask |= RADEON_RESET_DMA;
4198
4199         /* SDMA1_STATUS_REG */
4200         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4201         if (!(tmp & SDMA_IDLE))
4202                 reset_mask |= RADEON_RESET_DMA1;
4203
4204         /* SRBM_STATUS2 */
4205         tmp = RREG32(SRBM_STATUS2);
4206         if (tmp & SDMA_BUSY)
4207                 reset_mask |= RADEON_RESET_DMA;
4208
4209         if (tmp & SDMA1_BUSY)
4210                 reset_mask |= RADEON_RESET_DMA1;
4211
4212         /* SRBM_STATUS */
4213         tmp = RREG32(SRBM_STATUS);
4214
4215         if (tmp & IH_BUSY)
4216                 reset_mask |= RADEON_RESET_IH;
4217
4218         if (tmp & SEM_BUSY)
4219                 reset_mask |= RADEON_RESET_SEM;
4220
4221         if (tmp & GRBM_RQ_PENDING)
4222                 reset_mask |= RADEON_RESET_GRBM;
4223
4224         if (tmp & VMC_BUSY)
4225                 reset_mask |= RADEON_RESET_VMC;
4226
4227         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4228                    MCC_BUSY | MCD_BUSY))
4229                 reset_mask |= RADEON_RESET_MC;
4230
4231         if (evergreen_is_display_hung(rdev))
4232                 reset_mask |= RADEON_RESET_DISPLAY;
4233
4234         /* Skip MC reset as it's mostly likely not hung, just busy */
4235         if (reset_mask & RADEON_RESET_MC) {
4236                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4237                 reset_mask &= ~RADEON_RESET_MC;
4238         }
4239
4240         return reset_mask;
4241 }
4242
4243 /**
4244  * cik_gpu_soft_reset - soft reset GPU
4245  *
4246  * @rdev: radeon_device pointer
4247  * @reset_mask: mask of which blocks to reset
4248  *
4249  * Soft reset the blocks specified in @reset_mask.
4250  */
4251 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4252 {
4253         struct evergreen_mc_save save;
4254         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4255         u32 tmp;
4256
4257         if (reset_mask == 0)
4258                 return;
4259
4260         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4261
4262         cik_print_gpu_status_regs(rdev);
4263         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4264                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4265         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4266                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4267
4268         /* disable CG/PG */
4269         cik_fini_pg(rdev);
4270         cik_fini_cg(rdev);
4271
4272         /* stop the rlc */
4273         cik_rlc_stop(rdev);
4274
4275         /* Disable GFX parsing/prefetching */
4276         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4277
4278         /* Disable MEC parsing/prefetching */
4279         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4280
4281         if (reset_mask & RADEON_RESET_DMA) {
4282                 /* sdma0 */
4283                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4284                 tmp |= SDMA_HALT;
4285                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4286         }
4287         if (reset_mask & RADEON_RESET_DMA1) {
4288                 /* sdma1 */
4289                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4290                 tmp |= SDMA_HALT;
4291                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4292         }
4293
4294         evergreen_mc_stop(rdev, &save);
4295         if (evergreen_mc_wait_for_idle(rdev)) {
4296                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4297         }
4298
4299         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4300                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4301
4302         if (reset_mask & RADEON_RESET_CP) {
4303                 grbm_soft_reset |= SOFT_RESET_CP;
4304
4305                 srbm_soft_reset |= SOFT_RESET_GRBM;
4306         }
4307
4308         if (reset_mask & RADEON_RESET_DMA)
4309                 srbm_soft_reset |= SOFT_RESET_SDMA;
4310
4311         if (reset_mask & RADEON_RESET_DMA1)
4312                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4313
4314         if (reset_mask & RADEON_RESET_DISPLAY)
4315                 srbm_soft_reset |= SOFT_RESET_DC;
4316
4317         if (reset_mask & RADEON_RESET_RLC)
4318                 grbm_soft_reset |= SOFT_RESET_RLC;
4319
4320         if (reset_mask & RADEON_RESET_SEM)
4321                 srbm_soft_reset |= SOFT_RESET_SEM;
4322
4323         if (reset_mask & RADEON_RESET_IH)
4324                 srbm_soft_reset |= SOFT_RESET_IH;
4325
4326         if (reset_mask & RADEON_RESET_GRBM)
4327                 srbm_soft_reset |= SOFT_RESET_GRBM;
4328
4329         if (reset_mask & RADEON_RESET_VMC)
4330                 srbm_soft_reset |= SOFT_RESET_VMC;
4331
4332         if (!(rdev->flags & RADEON_IS_IGP)) {
4333                 if (reset_mask & RADEON_RESET_MC)
4334                         srbm_soft_reset |= SOFT_RESET_MC;
4335         }
4336
4337         if (grbm_soft_reset) {
4338                 tmp = RREG32(GRBM_SOFT_RESET);
4339                 tmp |= grbm_soft_reset;
4340                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4341                 WREG32(GRBM_SOFT_RESET, tmp);
4342                 tmp = RREG32(GRBM_SOFT_RESET);
4343
4344                 udelay(50);
4345
4346                 tmp &= ~grbm_soft_reset;
4347                 WREG32(GRBM_SOFT_RESET, tmp);
4348                 tmp = RREG32(GRBM_SOFT_RESET);
4349         }
4350
4351         if (srbm_soft_reset) {
4352                 tmp = RREG32(SRBM_SOFT_RESET);
4353                 tmp |= srbm_soft_reset;
4354                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4355                 WREG32(SRBM_SOFT_RESET, tmp);
4356                 tmp = RREG32(SRBM_SOFT_RESET);
4357
4358                 udelay(50);
4359
4360                 tmp &= ~srbm_soft_reset;
4361                 WREG32(SRBM_SOFT_RESET, tmp);
4362                 tmp = RREG32(SRBM_SOFT_RESET);
4363         }
4364
4365         /* Wait a little for things to settle down */
4366         udelay(50);
4367
4368         evergreen_mc_resume(rdev, &save);
4369         udelay(50);
4370
4371         cik_print_gpu_status_regs(rdev);
4372 }
4373
4374 /**
4375  * cik_asic_reset - soft reset GPU
4376  *
4377  * @rdev: radeon_device pointer
4378  *
4379  * Look up which blocks are hung and attempt
4380  * to reset them.
4381  * Returns 0 for success.
4382  */
4383 int cik_asic_reset(struct radeon_device *rdev)
4384 {
4385         u32 reset_mask;
4386
4387         reset_mask = cik_gpu_check_soft_reset(rdev);
4388
4389         if (reset_mask)
4390                 r600_set_bios_scratch_engine_hung(rdev, true);
4391
4392         cik_gpu_soft_reset(rdev, reset_mask);
4393
4394         reset_mask = cik_gpu_check_soft_reset(rdev);
4395
4396         if (!reset_mask)
4397                 r600_set_bios_scratch_engine_hung(rdev, false);
4398
4399         return 0;
4400 }
4401
4402 /**
4403  * cik_gfx_is_lockup - check if the 3D engine is locked up
4404  *
4405  * @rdev: radeon_device pointer
4406  * @ring: radeon_ring structure holding ring information
4407  *
4408  * Check if the 3D engine is locked up (CIK).
4409  * Returns true if the engine is locked, false if not.
4410  */
4411 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4412 {
4413         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4414
4415         if (!(reset_mask & (RADEON_RESET_GFX |
4416                             RADEON_RESET_COMPUTE |
4417                             RADEON_RESET_CP))) {
4418                 radeon_ring_lockup_update(ring);
4419                 return false;
4420         }
4421         /* force CP activities */
4422         radeon_ring_force_activity(rdev, ring);
4423         return radeon_ring_test_lockup(rdev, ring);
4424 }
4425
4426 /* MC */
4427 /**
4428  * cik_mc_program - program the GPU memory controller
4429  *
4430  * @rdev: radeon_device pointer
4431  *
4432  * Set the location of vram, gart, and AGP in the GPU's
4433  * physical address space (CIK).
4434  */
4435 static void cik_mc_program(struct radeon_device *rdev)
4436 {
4437         struct evergreen_mc_save save;
4438         u32 tmp;
4439         int i, j;
4440
4441         /* Initialize HDP */
4442         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4443                 WREG32((0x2c14 + j), 0x00000000);
4444                 WREG32((0x2c18 + j), 0x00000000);
4445                 WREG32((0x2c1c + j), 0x00000000);
4446                 WREG32((0x2c20 + j), 0x00000000);
4447                 WREG32((0x2c24 + j), 0x00000000);
4448         }
4449         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4450
4451         evergreen_mc_stop(rdev, &save);
4452         if (radeon_mc_wait_for_idle(rdev)) {
4453                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4454         }
4455         /* Lockout access through VGA aperture*/
4456         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4457         /* Update configuration */
4458         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4459                rdev->mc.vram_start >> 12);
4460         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4461                rdev->mc.vram_end >> 12);
4462         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4463                rdev->vram_scratch.gpu_addr >> 12);
4464         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4465         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4466         WREG32(MC_VM_FB_LOCATION, tmp);
4467         /* XXX double check these! */
4468         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4469         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4470         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4471         WREG32(MC_VM_AGP_BASE, 0);
4472         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4473         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4474         if (radeon_mc_wait_for_idle(rdev)) {
4475                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4476         }
4477         evergreen_mc_resume(rdev, &save);
4478         /* we need to own VRAM, so turn off the VGA renderer here
4479          * to stop it overwriting our objects */
4480         rv515_vga_render_disable(rdev);
4481 }
4482
4483 /**
4484  * cik_mc_init - initialize the memory controller driver params
4485  *
4486  * @rdev: radeon_device pointer
4487  *
4488  * Look up the amount of vram, vram width, and decide how to place
4489  * vram and gart within the GPU's physical address space (CIK).
4490  * Returns 0 for success.
4491  */
4492 static int cik_mc_init(struct radeon_device *rdev)
4493 {
4494         u32 tmp;
4495         int chansize, numchan;
4496
4497         /* Get VRAM informations */
4498         rdev->mc.vram_is_ddr = true;
4499         tmp = RREG32(MC_ARB_RAMCFG);
4500         if (tmp & CHANSIZE_MASK) {
4501                 chansize = 64;
4502         } else {
4503                 chansize = 32;
4504         }
4505         tmp = RREG32(MC_SHARED_CHMAP);
4506         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4507         case 0:
4508         default:
4509                 numchan = 1;
4510                 break;
4511         case 1:
4512                 numchan = 2;
4513                 break;
4514         case 2:
4515                 numchan = 4;
4516                 break;
4517         case 3:
4518                 numchan = 8;
4519                 break;
4520         case 4:
4521                 numchan = 3;
4522                 break;
4523         case 5:
4524                 numchan = 6;
4525                 break;
4526         case 6:
4527                 numchan = 10;
4528                 break;
4529         case 7:
4530                 numchan = 12;
4531                 break;
4532         case 8:
4533                 numchan = 16;
4534                 break;
4535         }
4536         rdev->mc.vram_width = numchan * chansize;
4537         /* Could aper size report 0 ? */
4538         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4539         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4540         /* size in MB on si */
4541         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4542         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
4543         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4544         si_vram_gtt_location(rdev, &rdev->mc);
4545         radeon_update_bandwidth_info(rdev);
4546
4547         return 0;
4548 }
4549
4550 /*
4551  * GART
4552  * VMID 0 is the physical GPU addresses as used by the kernel.
4553  * VMIDs 1-15 are used for userspace clients and are handled
4554  * by the radeon vm/hsa code.
4555  */
4556 /**
4557  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4558  *
4559  * @rdev: radeon_device pointer
4560  *
4561  * Flush the TLB for the VMID 0 page table (CIK).
4562  */
4563 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4564 {
4565         /* flush hdp cache */
4566         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4567
4568         /* bits 0-15 are the VM contexts0-15 */
4569         WREG32(VM_INVALIDATE_REQUEST, 0x1);
4570 }
4571
4572 /**
4573  * cik_pcie_gart_enable - gart enable
4574  *
4575  * @rdev: radeon_device pointer
4576  *
4577  * This sets up the TLBs, programs the page tables for VMID0,
4578  * sets up the hw for VMIDs 1-15 which are allocated on
4579  * demand, and sets up the global locations for the LDS, GDS,
4580  * and GPUVM for FSA64 clients (CIK).
4581  * Returns 0 for success, errors for failure.
4582  */
4583 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4584 {
4585         int r, i;
4586
4587         if (rdev->gart.robj == NULL) {
4588                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4589                 return -EINVAL;
4590         }
4591         r = radeon_gart_table_vram_pin(rdev);
4592         if (r)
4593                 return r;
4594         radeon_gart_restore(rdev);
4595         /* Setup TLB control */
4596         WREG32(MC_VM_MX_L1_TLB_CNTL,
4597                (0xA << 7) |
4598                ENABLE_L1_TLB |
4599                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4600                ENABLE_ADVANCED_DRIVER_MODEL |
4601                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4602         /* Setup L2 cache */
4603         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4604                ENABLE_L2_FRAGMENT_PROCESSING |
4605                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4606                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4607                EFFECTIVE_L2_QUEUE_SIZE(7) |
4608                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4609         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4610         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4611                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4612         /* setup context0 */
4613         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4614         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4615         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4616         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4617                         (u32)(rdev->dummy_page.addr >> 12));
4618         WREG32(VM_CONTEXT0_CNTL2, 0);
4619         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4620                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4621
4622         WREG32(0x15D4, 0);
4623         WREG32(0x15D8, 0);
4624         WREG32(0x15DC, 0);
4625
4626         /* empty context1-15 */
4627         /* FIXME start with 4G, once using 2 level pt switch to full
4628          * vm size space
4629          */
4630         /* set vm size, must be a multiple of 4 */
4631         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4632         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4633         for (i = 1; i < 16; i++) {
4634                 if (i < 8)
4635                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4636                                rdev->gart.table_addr >> 12);
4637                 else
4638                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4639                                rdev->gart.table_addr >> 12);
4640         }
4641
4642         /* enable context1-15 */
4643         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4644                (u32)(rdev->dummy_page.addr >> 12));
4645         WREG32(VM_CONTEXT1_CNTL2, 4);
4646         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4647                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4648                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4649                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4650                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4651                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4652                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4653                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4654                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4655                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4656                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4657                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4658                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4659
4660         /* TC cache setup ??? */
4661         WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4662         WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4663         WREG32(TC_CFG_L1_STORE_POLICY, 0);
4664
4665         WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4666         WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4667         WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4668         WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4669         WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4670
4671         WREG32(TC_CFG_L1_VOLATILE, 0);
4672         WREG32(TC_CFG_L2_VOLATILE, 0);
4673
4674         if (rdev->family == CHIP_KAVERI) {
4675                 u32 tmp = RREG32(CHUB_CONTROL);
4676                 tmp &= ~BYPASS_VM;
4677                 WREG32(CHUB_CONTROL, tmp);
4678         }
4679
4680         /* XXX SH_MEM regs */
4681         /* where to put LDS, scratch, GPUVM in FSA64 space */
4682         mutex_lock(&rdev->srbm_mutex);
4683         for (i = 0; i < 16; i++) {
4684                 cik_srbm_select(rdev, 0, 0, 0, i);
4685                 /* CP and shaders */
4686                 WREG32(SH_MEM_CONFIG, 0);
4687                 WREG32(SH_MEM_APE1_BASE, 1);
4688                 WREG32(SH_MEM_APE1_LIMIT, 0);
4689                 WREG32(SH_MEM_BASES, 0);
4690                 /* SDMA GFX */
4691                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4692                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4693                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4694                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4695                 /* XXX SDMA RLC - todo */
4696         }
4697         cik_srbm_select(rdev, 0, 0, 0, 0);
4698         mutex_unlock(&rdev->srbm_mutex);
4699
4700         cik_pcie_gart_tlb_flush(rdev);
4701         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4702                  (unsigned)(rdev->mc.gtt_size >> 20),
4703                  (unsigned long long)rdev->gart.table_addr);
4704         rdev->gart.ready = true;
4705         return 0;
4706 }
4707
4708 /**
4709  * cik_pcie_gart_disable - gart disable
4710  *
4711  * @rdev: radeon_device pointer
4712  *
4713  * This disables all VM page table (CIK).
4714  */
4715 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4716 {
4717         /* Disable all tables */
4718         WREG32(VM_CONTEXT0_CNTL, 0);
4719         WREG32(VM_CONTEXT1_CNTL, 0);
4720         /* Setup TLB control */
4721         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4722                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4723         /* Setup L2 cache */
4724         WREG32(VM_L2_CNTL,
4725                ENABLE_L2_FRAGMENT_PROCESSING |
4726                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4727                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4728                EFFECTIVE_L2_QUEUE_SIZE(7) |
4729                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4730         WREG32(VM_L2_CNTL2, 0);
4731         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4732                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4733         radeon_gart_table_vram_unpin(rdev);
4734 }
4735
4736 /**
4737  * cik_pcie_gart_fini - vm fini callback
4738  *
4739  * @rdev: radeon_device pointer
4740  *
4741  * Tears down the driver GART/VM setup (CIK).
4742  */
4743 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4744 {
4745         cik_pcie_gart_disable(rdev);
4746         radeon_gart_table_vram_free(rdev);
4747         radeon_gart_fini(rdev);
4748 }
4749
4750 /* vm parser */
4751 /**
4752  * cik_ib_parse - vm ib_parse callback
4753  *
4754  * @rdev: radeon_device pointer
4755  * @ib: indirect buffer pointer
4756  *
4757  * CIK uses hw IB checking so this is a nop (CIK).
4758  */
4759 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4760 {
4761         return 0;
4762 }
4763
4764 /*
4765  * vm
4766  * VMID 0 is the physical GPU addresses as used by the kernel.
4767  * VMIDs 1-15 are used for userspace clients and are handled
4768  * by the radeon vm/hsa code.
4769  */
4770 /**
4771  * cik_vm_init - cik vm init callback
4772  *
4773  * @rdev: radeon_device pointer
4774  *
4775  * Inits cik specific vm parameters (number of VMs, base of vram for
4776  * VMIDs 1-15) (CIK).
4777  * Returns 0 for success.
4778  */
4779 int cik_vm_init(struct radeon_device *rdev)
4780 {
4781         /* number of VMs */
4782         rdev->vm_manager.nvm = 16;
4783         /* base offset of vram pages */
4784         if (rdev->flags & RADEON_IS_IGP) {
4785                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4786                 tmp <<= 22;
4787                 rdev->vm_manager.vram_base_offset = tmp;
4788         } else
4789                 rdev->vm_manager.vram_base_offset = 0;
4790
4791         return 0;
4792 }
4793
4794 /**
4795  * cik_vm_fini - cik vm fini callback
4796  *
4797  * @rdev: radeon_device pointer
4798  *
4799  * Tear down any asic specific VM setup (CIK).
4800  */
4801 void cik_vm_fini(struct radeon_device *rdev)
4802 {
4803 }
4804
4805 /**
4806  * cik_vm_decode_fault - print human readable fault info
4807  *
4808  * @rdev: radeon_device pointer
4809  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4810  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4811  *
4812  * Print human readable fault information (CIK).
4813  */
4814 static void cik_vm_decode_fault(struct radeon_device *rdev,
4815                                 u32 status, u32 addr, u32 mc_client)
4816 {
4817         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4818         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4819         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4820         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
4821                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
4822
4823         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
4824                protections, vmid, addr,
4825                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4826                block, mc_client, mc_id);
4827 }
4828
4829 /**
4830  * cik_vm_flush - cik vm flush using the CP
4831  *
4832  * @rdev: radeon_device pointer
4833  *
4834  * Update the page table base and flush the VM TLB
4835  * using the CP (CIK).
4836  */
4837 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4838 {
4839         struct radeon_ring *ring = &rdev->ring[ridx];
4840
4841         if (vm == NULL)
4842                 return;
4843
4844         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4845         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4846                                  WRITE_DATA_DST_SEL(0)));
4847         if (vm->id < 8) {
4848                 radeon_ring_write(ring,
4849                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4850         } else {
4851                 radeon_ring_write(ring,
4852                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4853         }
4854         radeon_ring_write(ring, 0);
4855         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4856
4857         /* update SH_MEM_* regs */
4858         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4859         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4860                                  WRITE_DATA_DST_SEL(0)));
4861         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4862         radeon_ring_write(ring, 0);
4863         radeon_ring_write(ring, VMID(vm->id));
4864
4865         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4866         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4867                                  WRITE_DATA_DST_SEL(0)));
4868         radeon_ring_write(ring, SH_MEM_BASES >> 2);
4869         radeon_ring_write(ring, 0);
4870
4871         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4872         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4873         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4874         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4875
4876         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4877         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4878                                  WRITE_DATA_DST_SEL(0)));
4879         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4880         radeon_ring_write(ring, 0);
4881         radeon_ring_write(ring, VMID(0));
4882
4883         /* HDP flush */
4884         /* We should be using the WAIT_REG_MEM packet here like in
4885          * cik_fence_ring_emit(), but it causes the CP to hang in this
4886          * context...
4887          */
4888         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4889         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4890                                  WRITE_DATA_DST_SEL(0)));
4891         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4892         radeon_ring_write(ring, 0);
4893         radeon_ring_write(ring, 0);
4894
4895         /* bits 0-15 are the VM contexts0-15 */
4896         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4897         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4898                                  WRITE_DATA_DST_SEL(0)));
4899         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4900         radeon_ring_write(ring, 0);
4901         radeon_ring_write(ring, 1 << vm->id);
4902
4903         /* compute doesn't have PFP */
4904         if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4905                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4906                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4907                 radeon_ring_write(ring, 0x0);
4908         }
4909 }
4910
4911 /*
4912  * RLC
4913  * The RLC is a multi-purpose microengine that handles a
4914  * variety of functions, the most important of which is
4915  * the interrupt controller.
4916  */
4917 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4918                                           bool enable)
4919 {
4920         u32 tmp = RREG32(CP_INT_CNTL_RING0);
4921
4922         if (enable)
4923                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4924         else
4925                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4926         WREG32(CP_INT_CNTL_RING0, tmp);
4927 }
4928
4929 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4930 {
4931         u32 tmp;
4932
4933         tmp = RREG32(RLC_LB_CNTL);
4934         if (enable)
4935                 tmp |= LOAD_BALANCE_ENABLE;
4936         else
4937                 tmp &= ~LOAD_BALANCE_ENABLE;
4938         WREG32(RLC_LB_CNTL, tmp);
4939 }
4940
4941 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4942 {
4943         u32 i, j, k;
4944         u32 mask;
4945
4946         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4947                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4948                         cik_select_se_sh(rdev, i, j);
4949                         for (k = 0; k < rdev->usec_timeout; k++) {
4950                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4951                                         break;
4952                                 udelay(1);
4953                         }
4954                 }
4955         }
4956         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4957
4958         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4959         for (k = 0; k < rdev->usec_timeout; k++) {
4960                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4961                         break;
4962                 udelay(1);
4963         }
4964 }
4965
4966 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4967 {
4968         u32 tmp;
4969
4970         tmp = RREG32(RLC_CNTL);
4971         if (tmp != rlc)
4972                 WREG32(RLC_CNTL, rlc);
4973 }
4974
4975 static u32 cik_halt_rlc(struct radeon_device *rdev)
4976 {
4977         u32 data, orig;
4978
4979         orig = data = RREG32(RLC_CNTL);
4980
4981         if (data & RLC_ENABLE) {
4982                 u32 i;
4983
4984                 data &= ~RLC_ENABLE;
4985                 WREG32(RLC_CNTL, data);
4986
4987                 for (i = 0; i < rdev->usec_timeout; i++) {
4988                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4989                                 break;
4990                         udelay(1);
4991                 }
4992
4993                 cik_wait_for_rlc_serdes(rdev);
4994         }
4995
4996         return orig;
4997 }
4998
4999 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5000 {
5001         u32 tmp, i, mask;
5002
5003         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5004         WREG32(RLC_GPR_REG2, tmp);
5005
5006         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5007         for (i = 0; i < rdev->usec_timeout; i++) {
5008                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5009                         break;
5010                 udelay(1);
5011         }
5012
5013         for (i = 0; i < rdev->usec_timeout; i++) {
5014                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5015                         break;
5016                 udelay(1);
5017         }
5018 }
5019
5020 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5021 {
5022         u32 tmp;
5023
5024         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5025         WREG32(RLC_GPR_REG2, tmp);
5026 }
5027
5028 /**
5029  * cik_rlc_stop - stop the RLC ME
5030  *
5031  * @rdev: radeon_device pointer
5032  *
5033  * Halt the RLC ME (MicroEngine) (CIK).
5034  */
5035 static void cik_rlc_stop(struct radeon_device *rdev)
5036 {
5037         WREG32(RLC_CNTL, 0);
5038
5039         cik_enable_gui_idle_interrupt(rdev, false);
5040
5041         cik_wait_for_rlc_serdes(rdev);
5042 }
5043
5044 /**
5045  * cik_rlc_start - start the RLC ME
5046  *
5047  * @rdev: radeon_device pointer
5048  *
5049  * Unhalt the RLC ME (MicroEngine) (CIK).
5050  */
5051 static void cik_rlc_start(struct radeon_device *rdev)
5052 {
5053         WREG32(RLC_CNTL, RLC_ENABLE);
5054
5055         cik_enable_gui_idle_interrupt(rdev, true);
5056
5057         udelay(50);
5058 }
5059
5060 /**
5061  * cik_rlc_resume - setup the RLC hw
5062  *
5063  * @rdev: radeon_device pointer
5064  *
5065  * Initialize the RLC registers, load the ucode,
5066  * and start the RLC (CIK).
5067  * Returns 0 for success, -EINVAL if the ucode is not available.
5068  */
5069 static int cik_rlc_resume(struct radeon_device *rdev)
5070 {
5071         u32 i, size, tmp;
5072         const __be32 *fw_data;
5073
5074         if (!rdev->rlc_fw)
5075                 return -EINVAL;
5076
5077         switch (rdev->family) {
5078         case CHIP_BONAIRE:
5079         default:
5080                 size = BONAIRE_RLC_UCODE_SIZE;
5081                 break;
5082         case CHIP_KAVERI:
5083                 size = KV_RLC_UCODE_SIZE;
5084                 break;
5085         case CHIP_KABINI:
5086                 size = KB_RLC_UCODE_SIZE;
5087                 break;
5088         }
5089
5090         cik_rlc_stop(rdev);
5091
5092         /* disable CG */
5093         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5094         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5095
5096         si_rlc_reset(rdev);
5097
5098         cik_init_pg(rdev);
5099
5100         cik_init_cg(rdev);
5101
5102         WREG32(RLC_LB_CNTR_INIT, 0);
5103         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5104
5105         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5106         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5107         WREG32(RLC_LB_PARAMS, 0x00600408);
5108         WREG32(RLC_LB_CNTL, 0x80000004);
5109
5110         WREG32(RLC_MC_CNTL, 0);
5111         WREG32(RLC_UCODE_CNTL, 0);
5112
5113         fw_data = (const __be32 *)rdev->rlc_fw->data;
5114                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5115         for (i = 0; i < size; i++)
5116                 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5117         WREG32(RLC_GPM_UCODE_ADDR, 0);
5118
5119         /* XXX - find out what chips support lbpw */
5120         cik_enable_lbpw(rdev, false);
5121
5122         if (rdev->family == CHIP_BONAIRE)
5123                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5124
5125         cik_rlc_start(rdev);
5126
5127         return 0;
5128 }
5129
5130 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5131 {
5132         u32 data, orig, tmp, tmp2;
5133
5134         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5135
5136         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5137                 cik_enable_gui_idle_interrupt(rdev, true);
5138
5139                 tmp = cik_halt_rlc(rdev);
5140
5141                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5142                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5143                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5144                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5145                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5146
5147                 cik_update_rlc(rdev, tmp);
5148
5149                 data |= CGCG_EN | CGLS_EN;
5150         } else {
5151                 cik_enable_gui_idle_interrupt(rdev, false);
5152
5153                 RREG32(CB_CGTT_SCLK_CTRL);
5154                 RREG32(CB_CGTT_SCLK_CTRL);
5155                 RREG32(CB_CGTT_SCLK_CTRL);
5156                 RREG32(CB_CGTT_SCLK_CTRL);
5157
5158                 data &= ~(CGCG_EN | CGLS_EN);
5159         }
5160
5161         if (orig != data)
5162                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5163
5164 }
5165
5166 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5167 {
5168         u32 data, orig, tmp = 0;
5169
5170         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5171                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5172                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5173                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5174                                 data |= CP_MEM_LS_EN;
5175                                 if (orig != data)
5176                                         WREG32(CP_MEM_SLP_CNTL, data);
5177                         }
5178                 }
5179
5180                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5181                 data &= 0xfffffffd;
5182                 if (orig != data)
5183                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5184
5185                 tmp = cik_halt_rlc(rdev);
5186
5187                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5188                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5189                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5190                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5191                 WREG32(RLC_SERDES_WR_CTRL, data);
5192
5193                 cik_update_rlc(rdev, tmp);
5194
5195                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5196                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5197                         data &= ~SM_MODE_MASK;
5198                         data |= SM_MODE(0x2);
5199                         data |= SM_MODE_ENABLE;
5200                         data &= ~CGTS_OVERRIDE;
5201                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5202                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5203                                 data &= ~CGTS_LS_OVERRIDE;
5204                         data &= ~ON_MONITOR_ADD_MASK;
5205                         data |= ON_MONITOR_ADD_EN;
5206                         data |= ON_MONITOR_ADD(0x96);
5207                         if (orig != data)
5208                                 WREG32(CGTS_SM_CTRL_REG, data);
5209                 }
5210         } else {
5211                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5212                 data |= 0x00000002;
5213                 if (orig != data)
5214                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5215
5216                 data = RREG32(RLC_MEM_SLP_CNTL);
5217                 if (data & RLC_MEM_LS_EN) {
5218                         data &= ~RLC_MEM_LS_EN;
5219                         WREG32(RLC_MEM_SLP_CNTL, data);
5220                 }
5221
5222                 data = RREG32(CP_MEM_SLP_CNTL);
5223                 if (data & CP_MEM_LS_EN) {
5224                         data &= ~CP_MEM_LS_EN;
5225                         WREG32(CP_MEM_SLP_CNTL, data);
5226                 }
5227
5228                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5229                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5230                 if (orig != data)
5231                         WREG32(CGTS_SM_CTRL_REG, data);
5232
5233                 tmp = cik_halt_rlc(rdev);
5234
5235                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5236                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5237                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5238                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5239                 WREG32(RLC_SERDES_WR_CTRL, data);
5240
5241                 cik_update_rlc(rdev, tmp);
5242         }
5243 }
5244
5245 static const u32 mc_cg_registers[] =
5246 {
5247         MC_HUB_MISC_HUB_CG,
5248         MC_HUB_MISC_SIP_CG,
5249         MC_HUB_MISC_VM_CG,
5250         MC_XPB_CLK_GAT,
5251         ATC_MISC_CG,
5252         MC_CITF_MISC_WR_CG,
5253         MC_CITF_MISC_RD_CG,
5254         MC_CITF_MISC_VM_CG,
5255         VM_L2_CG,
5256 };
5257
5258 static void cik_enable_mc_ls(struct radeon_device *rdev,
5259                              bool enable)
5260 {
5261         int i;
5262         u32 orig, data;
5263
5264         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5265                 orig = data = RREG32(mc_cg_registers[i]);
5266                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5267                         data |= MC_LS_ENABLE;
5268                 else
5269                         data &= ~MC_LS_ENABLE;
5270                 if (data != orig)
5271                         WREG32(mc_cg_registers[i], data);
5272         }
5273 }
5274
5275 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5276                                bool enable)
5277 {
5278         int i;
5279         u32 orig, data;
5280
5281         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5282                 orig = data = RREG32(mc_cg_registers[i]);
5283                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5284                         data |= MC_CG_ENABLE;
5285                 else
5286                         data &= ~MC_CG_ENABLE;
5287                 if (data != orig)
5288                         WREG32(mc_cg_registers[i], data);
5289         }
5290 }
5291
5292 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5293                                  bool enable)
5294 {
5295         u32 orig, data;
5296
5297         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5298                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5299                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5300         } else {
5301                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5302                 data |= 0xff000000;
5303                 if (data != orig)
5304                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5305
5306                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5307                 data |= 0xff000000;
5308                 if (data != orig)
5309                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5310         }
5311 }
5312
5313 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5314                                  bool enable)
5315 {
5316         u32 orig, data;
5317
5318         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5319                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5320                 data |= 0x100;
5321                 if (orig != data)
5322                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5323
5324                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5325                 data |= 0x100;
5326                 if (orig != data)
5327                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5328         } else {
5329                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5330                 data &= ~0x100;
5331                 if (orig != data)
5332                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5333
5334                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5335                 data &= ~0x100;
5336                 if (orig != data)
5337                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5338         }
5339 }
5340
5341 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5342                                 bool enable)
5343 {
5344         u32 orig, data;
5345
5346         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5347                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5348                 data = 0xfff;
5349                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5350
5351                 orig = data = RREG32(UVD_CGC_CTRL);
5352                 data |= DCM;
5353                 if (orig != data)
5354                         WREG32(UVD_CGC_CTRL, data);
5355         } else {
5356                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5357                 data &= ~0xfff;
5358                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5359
5360                 orig = data = RREG32(UVD_CGC_CTRL);
5361                 data &= ~DCM;
5362                 if (orig != data)
5363                         WREG32(UVD_CGC_CTRL, data);
5364         }
5365 }
5366
5367 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5368                                bool enable)
5369 {
5370         u32 orig, data;
5371
5372         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5373
5374         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5375                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5376                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5377         else
5378                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5379                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5380
5381         if (orig != data)
5382                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
5383 }
5384
5385 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5386                                 bool enable)
5387 {
5388         u32 orig, data;
5389
5390         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5391
5392         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5393                 data &= ~CLOCK_GATING_DIS;
5394         else
5395                 data |= CLOCK_GATING_DIS;
5396
5397         if (orig != data)
5398                 WREG32(HDP_HOST_PATH_CNTL, data);
5399 }
5400
5401 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5402                               bool enable)
5403 {
5404         u32 orig, data;
5405
5406         orig = data = RREG32(HDP_MEM_POWER_LS);
5407
5408         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5409                 data |= HDP_LS_ENABLE;
5410         else
5411                 data &= ~HDP_LS_ENABLE;
5412
5413         if (orig != data)
5414                 WREG32(HDP_MEM_POWER_LS, data);
5415 }
5416
5417 void cik_update_cg(struct radeon_device *rdev,
5418                    u32 block, bool enable)
5419 {
5420
5421         if (block & RADEON_CG_BLOCK_GFX) {
5422                 cik_enable_gui_idle_interrupt(rdev, false);
5423                 /* order matters! */
5424                 if (enable) {
5425                         cik_enable_mgcg(rdev, true);
5426                         cik_enable_cgcg(rdev, true);
5427                 } else {
5428                         cik_enable_cgcg(rdev, false);
5429                         cik_enable_mgcg(rdev, false);
5430                 }
5431                 cik_enable_gui_idle_interrupt(rdev, true);
5432         }
5433
5434         if (block & RADEON_CG_BLOCK_MC) {
5435                 if (!(rdev->flags & RADEON_IS_IGP)) {
5436                         cik_enable_mc_mgcg(rdev, enable);
5437                         cik_enable_mc_ls(rdev, enable);
5438                 }
5439         }
5440
5441         if (block & RADEON_CG_BLOCK_SDMA) {
5442                 cik_enable_sdma_mgcg(rdev, enable);
5443                 cik_enable_sdma_mgls(rdev, enable);
5444         }
5445
5446         if (block & RADEON_CG_BLOCK_BIF) {
5447                 cik_enable_bif_mgls(rdev, enable);
5448         }
5449
5450         if (block & RADEON_CG_BLOCK_UVD) {
5451                 if (rdev->has_uvd)
5452                         cik_enable_uvd_mgcg(rdev, enable);
5453         }
5454
5455         if (block & RADEON_CG_BLOCK_HDP) {
5456                 cik_enable_hdp_mgcg(rdev, enable);
5457                 cik_enable_hdp_ls(rdev, enable);
5458         }
5459 }
5460
5461 static void cik_init_cg(struct radeon_device *rdev)
5462 {
5463
5464         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5465
5466         if (rdev->has_uvd)
5467                 si_init_uvd_internal_cg(rdev);
5468
5469         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5470                              RADEON_CG_BLOCK_SDMA |
5471                              RADEON_CG_BLOCK_BIF |
5472                              RADEON_CG_BLOCK_UVD |
5473                              RADEON_CG_BLOCK_HDP), true);
5474 }
5475
5476 static void cik_fini_cg(struct radeon_device *rdev)
5477 {
5478         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5479                              RADEON_CG_BLOCK_SDMA |
5480                              RADEON_CG_BLOCK_BIF |
5481                              RADEON_CG_BLOCK_UVD |
5482                              RADEON_CG_BLOCK_HDP), false);
5483
5484         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5485 }
5486
5487 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5488                                           bool enable)
5489 {
5490         u32 data, orig;
5491
5492         orig = data = RREG32(RLC_PG_CNTL);
5493         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5494                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5495         else
5496                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5497         if (orig != data)
5498                 WREG32(RLC_PG_CNTL, data);
5499 }
5500
5501 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5502                                           bool enable)
5503 {
5504         u32 data, orig;
5505
5506         orig = data = RREG32(RLC_PG_CNTL);
5507         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5508                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5509         else
5510                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5511         if (orig != data)
5512                 WREG32(RLC_PG_CNTL, data);
5513 }
5514
5515 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5516 {
5517         u32 data, orig;
5518
5519         orig = data = RREG32(RLC_PG_CNTL);
5520         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5521                 data &= ~DISABLE_CP_PG;
5522         else
5523                 data |= DISABLE_CP_PG;
5524         if (orig != data)
5525                 WREG32(RLC_PG_CNTL, data);
5526 }
5527
5528 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5529 {
5530         u32 data, orig;
5531
5532         orig = data = RREG32(RLC_PG_CNTL);
5533         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5534                 data &= ~DISABLE_GDS_PG;
5535         else
5536                 data |= DISABLE_GDS_PG;
5537         if (orig != data)
5538                 WREG32(RLC_PG_CNTL, data);
5539 }
5540
5541 #define CP_ME_TABLE_SIZE    96
5542 #define CP_ME_TABLE_OFFSET  2048
5543 #define CP_MEC_TABLE_OFFSET 4096
5544
5545 void cik_init_cp_pg_table(struct radeon_device *rdev)
5546 {
5547         const __be32 *fw_data;
5548         volatile u32 *dst_ptr;
5549         int me, i, max_me = 4;
5550         u32 bo_offset = 0;
5551         u32 table_offset;
5552
5553         if (rdev->family == CHIP_KAVERI)
5554                 max_me = 5;
5555
5556         if (rdev->rlc.cp_table_ptr == NULL)
5557                 return;
5558
5559         /* write the cp table buffer */
5560         dst_ptr = rdev->rlc.cp_table_ptr;
5561         for (me = 0; me < max_me; me++) {
5562                 if (me == 0) {
5563                         fw_data = (const __be32 *)rdev->ce_fw->data;
5564                         table_offset = CP_ME_TABLE_OFFSET;
5565                 } else if (me == 1) {
5566                         fw_data = (const __be32 *)rdev->pfp_fw->data;
5567                         table_offset = CP_ME_TABLE_OFFSET;
5568                 } else if (me == 2) {
5569                         fw_data = (const __be32 *)rdev->me_fw->data;
5570                         table_offset = CP_ME_TABLE_OFFSET;
5571                 } else {
5572                         fw_data = (const __be32 *)rdev->mec_fw->data;
5573                         table_offset = CP_MEC_TABLE_OFFSET;
5574                 }
5575
5576                 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5577                         dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
5578                 }
5579                 bo_offset += CP_ME_TABLE_SIZE;
5580         }
5581 }
5582
5583 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5584                                 bool enable)
5585 {
5586         u32 data, orig;
5587
5588         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5589                 orig = data = RREG32(RLC_PG_CNTL);
5590                 data |= GFX_PG_ENABLE;
5591                 if (orig != data)
5592                         WREG32(RLC_PG_CNTL, data);
5593
5594                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5595                 data |= AUTO_PG_EN;
5596                 if (orig != data)
5597                         WREG32(RLC_AUTO_PG_CTRL, data);
5598         } else {
5599                 orig = data = RREG32(RLC_PG_CNTL);
5600                 data &= ~GFX_PG_ENABLE;
5601                 if (orig != data)
5602                         WREG32(RLC_PG_CNTL, data);
5603
5604                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5605                 data &= ~AUTO_PG_EN;
5606                 if (orig != data)
5607                         WREG32(RLC_AUTO_PG_CTRL, data);
5608
5609                 data = RREG32(DB_RENDER_CONTROL);
5610         }
5611 }
5612
5613 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5614 {
5615         u32 mask = 0, tmp, tmp1;
5616         int i;
5617
5618         cik_select_se_sh(rdev, se, sh);
5619         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5620         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5621         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5622
5623         tmp &= 0xffff0000;
5624
5625         tmp |= tmp1;
5626         tmp >>= 16;
5627
5628         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5629                 mask <<= 1;
5630                 mask |= 1;
5631         }
5632
5633         return (~tmp) & mask;
5634 }
5635
5636 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5637 {
5638         u32 i, j, k, active_cu_number = 0;
5639         u32 mask, counter, cu_bitmap;
5640         u32 tmp = 0;
5641
5642         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5643                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5644                         mask = 1;
5645                         cu_bitmap = 0;
5646                         counter = 0;
5647                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5648                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5649                                         if (counter < 2)
5650                                                 cu_bitmap |= mask;
5651                                         counter ++;
5652                                 }
5653                                 mask <<= 1;
5654                         }
5655
5656                         active_cu_number += counter;
5657                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5658                 }
5659         }
5660
5661         WREG32(RLC_PG_AO_CU_MASK, tmp);
5662
5663         tmp = RREG32(RLC_MAX_PG_CU);
5664         tmp &= ~MAX_PU_CU_MASK;
5665         tmp |= MAX_PU_CU(active_cu_number);
5666         WREG32(RLC_MAX_PG_CU, tmp);
5667 }
5668
5669 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5670                                        bool enable)
5671 {
5672         u32 data, orig;
5673
5674         orig = data = RREG32(RLC_PG_CNTL);
5675         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5676                 data |= STATIC_PER_CU_PG_ENABLE;
5677         else
5678                 data &= ~STATIC_PER_CU_PG_ENABLE;
5679         if (orig != data)
5680                 WREG32(RLC_PG_CNTL, data);
5681 }
5682
5683 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5684                                         bool enable)
5685 {
5686         u32 data, orig;
5687
5688         orig = data = RREG32(RLC_PG_CNTL);
5689         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5690                 data |= DYN_PER_CU_PG_ENABLE;
5691         else
5692                 data &= ~DYN_PER_CU_PG_ENABLE;
5693         if (orig != data)
5694                 WREG32(RLC_PG_CNTL, data);
5695 }
5696
5697 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5698 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5699
5700 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5701 {
5702         u32 data, orig;
5703         u32 i;
5704
5705         if (rdev->rlc.cs_data) {
5706                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5707                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5708                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5709                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5710         } else {
5711                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5712                 for (i = 0; i < 3; i++)
5713                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
5714         }
5715         if (rdev->rlc.reg_list) {
5716                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5717                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5718                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5719         }
5720
5721         orig = data = RREG32(RLC_PG_CNTL);
5722         data |= GFX_PG_SRC;
5723         if (orig != data)
5724                 WREG32(RLC_PG_CNTL, data);
5725
5726         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5727         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5728
5729         data = RREG32(CP_RB_WPTR_POLL_CNTL);
5730         data &= ~IDLE_POLL_COUNT_MASK;
5731         data |= IDLE_POLL_COUNT(0x60);
5732         WREG32(CP_RB_WPTR_POLL_CNTL, data);
5733
5734         data = 0x10101010;
5735         WREG32(RLC_PG_DELAY, data);
5736
5737         data = RREG32(RLC_PG_DELAY_2);
5738         data &= ~0xff;
5739         data |= 0x3;
5740         WREG32(RLC_PG_DELAY_2, data);
5741
5742         data = RREG32(RLC_AUTO_PG_CTRL);
5743         data &= ~GRBM_REG_SGIT_MASK;
5744         data |= GRBM_REG_SGIT(0x700);
5745         WREG32(RLC_AUTO_PG_CTRL, data);
5746
5747 }
5748
5749 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5750 {
5751         cik_enable_gfx_cgpg(rdev, enable);
5752         cik_enable_gfx_static_mgpg(rdev, enable);
5753         cik_enable_gfx_dynamic_mgpg(rdev, enable);
5754 }
5755
5756 u32 cik_get_csb_size(struct radeon_device *rdev)
5757 {
5758         u32 count = 0;
5759         const struct cs_section_def *sect = NULL;
5760         const struct cs_extent_def *ext = NULL;
5761
5762         if (rdev->rlc.cs_data == NULL)
5763                 return 0;
5764
5765         /* begin clear state */
5766         count += 2;
5767         /* context control state */
5768         count += 3;
5769
5770         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5771                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5772                         if (sect->id == SECT_CONTEXT)
5773                                 count += 2 + ext->reg_count;
5774                         else
5775                                 return 0;
5776                 }
5777         }
5778         /* pa_sc_raster_config/pa_sc_raster_config1 */
5779         count += 4;
5780         /* end clear state */
5781         count += 2;
5782         /* clear state */
5783         count += 2;
5784
5785         return count;
5786 }
5787
5788 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5789 {
5790         u32 count = 0, i;
5791         const struct cs_section_def *sect = NULL;
5792         const struct cs_extent_def *ext = NULL;
5793
5794         if (rdev->rlc.cs_data == NULL)
5795                 return;
5796         if (buffer == NULL)
5797                 return;
5798
5799         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5800         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5801
5802         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5803         buffer[count++] = cpu_to_le32(0x80000000);
5804         buffer[count++] = cpu_to_le32(0x80000000);
5805
5806         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5807                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5808                         if (sect->id == SECT_CONTEXT) {
5809                                 buffer[count++] =
5810                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5811                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5812                                 for (i = 0; i < ext->reg_count; i++)
5813                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5814                         } else {
5815                                 return;
5816                         }
5817                 }
5818         }
5819
5820         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
5821         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5822         switch (rdev->family) {
5823         case CHIP_BONAIRE:
5824                 buffer[count++] = cpu_to_le32(0x16000012);
5825                 buffer[count++] = cpu_to_le32(0x00000000);
5826                 break;
5827         case CHIP_KAVERI:
5828                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
5829                 buffer[count++] = cpu_to_le32(0x00000000);
5830                 break;
5831         case CHIP_KABINI:
5832                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
5833                 buffer[count++] = cpu_to_le32(0x00000000);
5834                 break;
5835         default:
5836                 buffer[count++] = cpu_to_le32(0x00000000);
5837                 buffer[count++] = cpu_to_le32(0x00000000);
5838                 break;
5839         }
5840
5841         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5842         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5843
5844         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5845         buffer[count++] = cpu_to_le32(0);
5846 }
5847
5848 static void cik_init_pg(struct radeon_device *rdev)
5849 {
5850         if (rdev->pg_flags) {
5851                 cik_enable_sck_slowdown_on_pu(rdev, true);
5852                 cik_enable_sck_slowdown_on_pd(rdev, true);
5853                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5854                         cik_init_gfx_cgpg(rdev);
5855                         cik_enable_cp_pg(rdev, true);
5856                         cik_enable_gds_pg(rdev, true);
5857                 }
5858                 cik_init_ao_cu_mask(rdev);
5859                 cik_update_gfx_pg(rdev, true);
5860         }
5861 }
5862
5863 static void cik_fini_pg(struct radeon_device *rdev)
5864 {
5865         if (rdev->pg_flags) {
5866                 cik_update_gfx_pg(rdev, false);
5867                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5868                         cik_enable_cp_pg(rdev, false);
5869                         cik_enable_gds_pg(rdev, false);
5870                 }
5871         }
5872 }
5873
5874 /*
5875  * Interrupts
5876  * Starting with r6xx, interrupts are handled via a ring buffer.
5877  * Ring buffers are areas of GPU accessible memory that the GPU
5878  * writes interrupt vectors into and the host reads vectors out of.
5879  * There is a rptr (read pointer) that determines where the
5880  * host is currently reading, and a wptr (write pointer)
5881  * which determines where the GPU has written.  When the
5882  * pointers are equal, the ring is idle.  When the GPU
5883  * writes vectors to the ring buffer, it increments the
5884  * wptr.  When there is an interrupt, the host then starts
5885  * fetching commands and processing them until the pointers are
5886  * equal again at which point it updates the rptr.
5887  */
5888
5889 /**
5890  * cik_enable_interrupts - Enable the interrupt ring buffer
5891  *
5892  * @rdev: radeon_device pointer
5893  *
5894  * Enable the interrupt ring buffer (CIK).
5895  */
5896 static void cik_enable_interrupts(struct radeon_device *rdev)
5897 {
5898         u32 ih_cntl = RREG32(IH_CNTL);
5899         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5900
5901         ih_cntl |= ENABLE_INTR;
5902         ih_rb_cntl |= IH_RB_ENABLE;
5903         WREG32(IH_CNTL, ih_cntl);
5904         WREG32(IH_RB_CNTL, ih_rb_cntl);
5905         rdev->ih.enabled = true;
5906 }
5907
5908 /**
5909  * cik_disable_interrupts - Disable the interrupt ring buffer
5910  *
5911  * @rdev: radeon_device pointer
5912  *
5913  * Disable the interrupt ring buffer (CIK).
5914  */
5915 static void cik_disable_interrupts(struct radeon_device *rdev)
5916 {
5917         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5918         u32 ih_cntl = RREG32(IH_CNTL);
5919
5920         ih_rb_cntl &= ~IH_RB_ENABLE;
5921         ih_cntl &= ~ENABLE_INTR;
5922         WREG32(IH_RB_CNTL, ih_rb_cntl);
5923         WREG32(IH_CNTL, ih_cntl);
5924         /* set rptr, wptr to 0 */
5925         WREG32(IH_RB_RPTR, 0);
5926         WREG32(IH_RB_WPTR, 0);
5927         rdev->ih.enabled = false;
5928         rdev->ih.rptr = 0;
5929 }
5930
5931 /**
5932  * cik_disable_interrupt_state - Disable all interrupt sources
5933  *
5934  * @rdev: radeon_device pointer
5935  *
5936  * Clear all interrupt enable bits used by the driver (CIK).
5937  */
5938 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5939 {
5940         u32 tmp;
5941
5942         /* gfx ring */
5943         tmp = RREG32(CP_INT_CNTL_RING0) &
5944                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5945         WREG32(CP_INT_CNTL_RING0, tmp);
5946         /* sdma */
5947         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5948         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5949         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5950         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5951         /* compute queues */
5952         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5953         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5954         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5955         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5956         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5957         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5958         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5959         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5960         /* grbm */
5961         WREG32(GRBM_INT_CNTL, 0);
5962         /* vline/vblank, etc. */
5963         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5964         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5965         if (rdev->num_crtc >= 4) {
5966                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5967                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5968         }
5969         if (rdev->num_crtc >= 6) {
5970                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5971                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5972         }
5973
5974         /* dac hotplug */
5975         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5976
5977         /* digital hotplug */
5978         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5979         WREG32(DC_HPD1_INT_CONTROL, tmp);
5980         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5981         WREG32(DC_HPD2_INT_CONTROL, tmp);
5982         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5983         WREG32(DC_HPD3_INT_CONTROL, tmp);
5984         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5985         WREG32(DC_HPD4_INT_CONTROL, tmp);
5986         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5987         WREG32(DC_HPD5_INT_CONTROL, tmp);
5988         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5989         WREG32(DC_HPD6_INT_CONTROL, tmp);
5990
5991 }
5992
5993 /**
5994  * cik_irq_init - init and enable the interrupt ring
5995  *
5996  * @rdev: radeon_device pointer
5997  *
5998  * Allocate a ring buffer for the interrupt controller,
5999  * enable the RLC, disable interrupts, enable the IH
6000  * ring buffer and enable it (CIK).
6001  * Called at device load and reume.
6002  * Returns 0 for success, errors for failure.
6003  */
6004 static int cik_irq_init(struct radeon_device *rdev)
6005 {
6006         int ret = 0;
6007         int rb_bufsz;
6008         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6009
6010         /* allocate ring */
6011         ret = r600_ih_ring_alloc(rdev);
6012         if (ret)
6013                 return ret;
6014
6015         /* disable irqs */
6016         cik_disable_interrupts(rdev);
6017
6018         /* init rlc */
6019         ret = cik_rlc_resume(rdev);
6020         if (ret) {
6021                 r600_ih_ring_fini(rdev);
6022                 return ret;
6023         }
6024
6025         /* setup interrupt control */
6026         /* XXX this should actually be a bus address, not an MC address. same on older asics */
6027         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6028         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6029         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6030          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6031          */
6032         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6033         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6034         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6035         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6036
6037         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6038         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6039
6040         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6041                       IH_WPTR_OVERFLOW_CLEAR |
6042                       (rb_bufsz << 1));
6043
6044         if (rdev->wb.enabled)
6045                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6046
6047         /* set the writeback address whether it's enabled or not */
6048         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6049         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6050
6051         WREG32(IH_RB_CNTL, ih_rb_cntl);
6052
6053         /* set rptr, wptr to 0 */
6054         WREG32(IH_RB_RPTR, 0);
6055         WREG32(IH_RB_WPTR, 0);
6056
6057         /* Default settings for IH_CNTL (disabled at first) */
6058         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6059         /* RPTR_REARM only works if msi's are enabled */
6060         if (rdev->msi_enabled)
6061                 ih_cntl |= RPTR_REARM;
6062         WREG32(IH_CNTL, ih_cntl);
6063
6064         /* force the active interrupt state to all disabled */
6065         cik_disable_interrupt_state(rdev);
6066
6067         pci_set_master(rdev->pdev);
6068
6069         /* enable irqs */
6070         cik_enable_interrupts(rdev);
6071
6072         return ret;
6073 }
6074
6075 /**
6076  * cik_irq_set - enable/disable interrupt sources
6077  *
6078  * @rdev: radeon_device pointer
6079  *
6080  * Enable interrupt sources on the GPU (vblanks, hpd,
6081  * etc.) (CIK).
6082  * Returns 0 for success, errors for failure.
6083  */
6084 int cik_irq_set(struct radeon_device *rdev)
6085 {
6086         u32 cp_int_cntl;
6087         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6088         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6089         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6090         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6091         u32 grbm_int_cntl = 0;
6092         u32 dma_cntl, dma_cntl1;
6093         u32 thermal_int;
6094
6095         if (!rdev->irq.installed) {
6096                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6097                 return -EINVAL;
6098         }
6099         /* don't enable anything if the ih is disabled */
6100         if (!rdev->ih.enabled) {
6101                 cik_disable_interrupts(rdev);
6102                 /* force the active interrupt state to all disabled */
6103                 cik_disable_interrupt_state(rdev);
6104                 return 0;
6105         }
6106
6107         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6108                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6109         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6110
6111         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6112         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6113         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6114         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6115         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6116         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6117
6118         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6119         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6120
6121         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6122         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6123         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6124         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6125         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6126         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6127         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6128         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6129
6130         if (rdev->flags & RADEON_IS_IGP)
6131                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6132                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
6133         else
6134                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6135                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6136
6137         /* enable CP interrupts on all rings */
6138         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6139                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6140                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6141         }
6142         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6143                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6144                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6145                 if (ring->me == 1) {
6146                         switch (ring->pipe) {
6147                         case 0:
6148                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6149                                 break;
6150                         case 1:
6151                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6152                                 break;
6153                         case 2:
6154                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6155                                 break;
6156                         case 3:
6157                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6158                                 break;
6159                         default:
6160                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6161                                 break;
6162                         }
6163                 } else if (ring->me == 2) {
6164                         switch (ring->pipe) {
6165                         case 0:
6166                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6167                                 break;
6168                         case 1:
6169                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6170                                 break;
6171                         case 2:
6172                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6173                                 break;
6174                         case 3:
6175                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6176                                 break;
6177                         default:
6178                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6179                                 break;
6180                         }
6181                 } else {
6182                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6183                 }
6184         }
6185         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6186                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6187                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6188                 if (ring->me == 1) {
6189                         switch (ring->pipe) {
6190                         case 0:
6191                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6192                                 break;
6193                         case 1:
6194                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6195                                 break;
6196                         case 2:
6197                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6198                                 break;
6199                         case 3:
6200                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6201                                 break;
6202                         default:
6203                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6204                                 break;
6205                         }
6206                 } else if (ring->me == 2) {
6207                         switch (ring->pipe) {
6208                         case 0:
6209                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6210                                 break;
6211                         case 1:
6212                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6213                                 break;
6214                         case 2:
6215                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6216                                 break;
6217                         case 3:
6218                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6219                                 break;
6220                         default:
6221                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6222                                 break;
6223                         }
6224                 } else {
6225                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6226                 }
6227         }
6228
6229         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6230                 DRM_DEBUG("cik_irq_set: sw int dma\n");
6231                 dma_cntl |= TRAP_ENABLE;
6232         }
6233
6234         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6235                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6236                 dma_cntl1 |= TRAP_ENABLE;
6237         }
6238
6239         if (rdev->irq.crtc_vblank_int[0] ||
6240             atomic_read(&rdev->irq.pflip[0])) {
6241                 DRM_DEBUG("cik_irq_set: vblank 0\n");
6242                 crtc1 |= VBLANK_INTERRUPT_MASK;
6243         }
6244         if (rdev->irq.crtc_vblank_int[1] ||
6245             atomic_read(&rdev->irq.pflip[1])) {
6246                 DRM_DEBUG("cik_irq_set: vblank 1\n");
6247                 crtc2 |= VBLANK_INTERRUPT_MASK;
6248         }
6249         if (rdev->irq.crtc_vblank_int[2] ||
6250             atomic_read(&rdev->irq.pflip[2])) {
6251                 DRM_DEBUG("cik_irq_set: vblank 2\n");
6252                 crtc3 |= VBLANK_INTERRUPT_MASK;
6253         }
6254         if (rdev->irq.crtc_vblank_int[3] ||
6255             atomic_read(&rdev->irq.pflip[3])) {
6256                 DRM_DEBUG("cik_irq_set: vblank 3\n");
6257                 crtc4 |= VBLANK_INTERRUPT_MASK;
6258         }
6259         if (rdev->irq.crtc_vblank_int[4] ||
6260             atomic_read(&rdev->irq.pflip[4])) {
6261                 DRM_DEBUG("cik_irq_set: vblank 4\n");
6262                 crtc5 |= VBLANK_INTERRUPT_MASK;
6263         }
6264         if (rdev->irq.crtc_vblank_int[5] ||
6265             atomic_read(&rdev->irq.pflip[5])) {
6266                 DRM_DEBUG("cik_irq_set: vblank 5\n");
6267                 crtc6 |= VBLANK_INTERRUPT_MASK;
6268         }
6269         if (rdev->irq.hpd[0]) {
6270                 DRM_DEBUG("cik_irq_set: hpd 1\n");
6271                 hpd1 |= DC_HPDx_INT_EN;
6272         }
6273         if (rdev->irq.hpd[1]) {
6274                 DRM_DEBUG("cik_irq_set: hpd 2\n");
6275                 hpd2 |= DC_HPDx_INT_EN;
6276         }
6277         if (rdev->irq.hpd[2]) {
6278                 DRM_DEBUG("cik_irq_set: hpd 3\n");
6279                 hpd3 |= DC_HPDx_INT_EN;
6280         }
6281         if (rdev->irq.hpd[3]) {
6282                 DRM_DEBUG("cik_irq_set: hpd 4\n");
6283                 hpd4 |= DC_HPDx_INT_EN;
6284         }
6285         if (rdev->irq.hpd[4]) {
6286                 DRM_DEBUG("cik_irq_set: hpd 5\n");
6287                 hpd5 |= DC_HPDx_INT_EN;
6288         }
6289         if (rdev->irq.hpd[5]) {
6290                 DRM_DEBUG("cik_irq_set: hpd 6\n");
6291                 hpd6 |= DC_HPDx_INT_EN;
6292         }
6293
6294         if (rdev->irq.dpm_thermal) {
6295                 DRM_DEBUG("dpm thermal\n");
6296                 if (rdev->flags & RADEON_IS_IGP)
6297                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6298                 else
6299                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6300         }
6301
6302         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6303
6304         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6305         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6306
6307         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6308         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6309         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6310         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6311         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6312         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6313         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6314         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6315
6316         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6317
6318         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6319         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6320         if (rdev->num_crtc >= 4) {
6321                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6322                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6323         }
6324         if (rdev->num_crtc >= 6) {
6325                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6326                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6327         }
6328
6329         WREG32(DC_HPD1_INT_CONTROL, hpd1);
6330         WREG32(DC_HPD2_INT_CONTROL, hpd2);
6331         WREG32(DC_HPD3_INT_CONTROL, hpd3);
6332         WREG32(DC_HPD4_INT_CONTROL, hpd4);
6333         WREG32(DC_HPD5_INT_CONTROL, hpd5);
6334         WREG32(DC_HPD6_INT_CONTROL, hpd6);
6335
6336         if (rdev->flags & RADEON_IS_IGP)
6337                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6338         else
6339                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6340
6341         return 0;
6342 }
6343
6344 /**
6345  * cik_irq_ack - ack interrupt sources
6346  *
6347  * @rdev: radeon_device pointer
6348  *
6349  * Ack interrupt sources on the GPU (vblanks, hpd,
6350  * etc.) (CIK).  Certain interrupts sources are sw
6351  * generated and do not require an explicit ack.
6352  */
6353 static inline void cik_irq_ack(struct radeon_device *rdev)
6354 {
6355         u32 tmp;
6356
6357         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6358         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6359         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6360         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6361         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6362         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6363         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6364
6365         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6366                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6367         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6368                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6369         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6370                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6371         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6372                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6373
6374         if (rdev->num_crtc >= 4) {
6375                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6376                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6377                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6378                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6379                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6380                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6381                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6382                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6383         }
6384
6385         if (rdev->num_crtc >= 6) {
6386                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6387                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6388                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6389                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6390                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6391                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6392                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6393                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6394         }
6395
6396         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6397                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6398                 tmp |= DC_HPDx_INT_ACK;
6399                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6400         }
6401         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6402                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6403                 tmp |= DC_HPDx_INT_ACK;
6404                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6405         }
6406         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6407                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6408                 tmp |= DC_HPDx_INT_ACK;
6409                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6410         }
6411         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6412                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6413                 tmp |= DC_HPDx_INT_ACK;
6414                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6415         }
6416         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6417                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6418                 tmp |= DC_HPDx_INT_ACK;
6419                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6420         }
6421         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6422                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6423                 tmp |= DC_HPDx_INT_ACK;
6424                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6425         }
6426 }
6427
6428 /**
6429  * cik_irq_disable - disable interrupts
6430  *
6431  * @rdev: radeon_device pointer
6432  *
6433  * Disable interrupts on the hw (CIK).
6434  */
6435 static void cik_irq_disable(struct radeon_device *rdev)
6436 {
6437         cik_disable_interrupts(rdev);
6438         /* Wait and acknowledge irq */
6439         mdelay(1);
6440         cik_irq_ack(rdev);
6441         cik_disable_interrupt_state(rdev);
6442 }
6443
6444 /**
6445  * cik_irq_disable - disable interrupts for suspend
6446  *
6447  * @rdev: radeon_device pointer
6448  *
6449  * Disable interrupts and stop the RLC (CIK).
6450  * Used for suspend.
6451  */
6452 static void cik_irq_suspend(struct radeon_device *rdev)
6453 {
6454         cik_irq_disable(rdev);
6455         cik_rlc_stop(rdev);
6456 }
6457
6458 /**
6459  * cik_irq_fini - tear down interrupt support
6460  *
6461  * @rdev: radeon_device pointer
6462  *
6463  * Disable interrupts on the hw and free the IH ring
6464  * buffer (CIK).
6465  * Used for driver unload.
6466  */
6467 static void cik_irq_fini(struct radeon_device *rdev)
6468 {
6469         cik_irq_suspend(rdev);
6470         r600_ih_ring_fini(rdev);
6471 }
6472
6473 /**
6474  * cik_get_ih_wptr - get the IH ring buffer wptr
6475  *
6476  * @rdev: radeon_device pointer
6477  *
6478  * Get the IH ring buffer wptr from either the register
6479  * or the writeback memory buffer (CIK).  Also check for
6480  * ring buffer overflow and deal with it.
6481  * Used by cik_irq_process().
6482  * Returns the value of the wptr.
6483  */
6484 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6485 {
6486         u32 wptr, tmp;
6487
6488         if (rdev->wb.enabled)
6489                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6490         else
6491                 wptr = RREG32(IH_RB_WPTR);
6492
6493         if (wptr & RB_OVERFLOW) {
6494                 /* When a ring buffer overflow happen start parsing interrupt
6495                  * from the last not overwritten vector (wptr + 16). Hopefully
6496                  * this should allow us to catchup.
6497                  */
6498                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6499                         wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6500                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6501                 tmp = RREG32(IH_RB_CNTL);
6502                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6503                 WREG32(IH_RB_CNTL, tmp);
6504         }
6505         return (wptr & rdev->ih.ptr_mask);
6506 }
6507
6508 /*        CIK IV Ring
6509  * Each IV ring entry is 128 bits:
6510  * [7:0]    - interrupt source id
6511  * [31:8]   - reserved
6512  * [59:32]  - interrupt source data
6513  * [63:60]  - reserved
6514  * [71:64]  - RINGID
6515  *            CP:
6516  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6517  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6518  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6519  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6520  *            PIPE_ID - ME0 0=3D
6521  *                    - ME1&2 compute dispatcher (4 pipes each)
6522  *            SDMA:
6523  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6524  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6525  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6526  * [79:72]  - VMID
6527  * [95:80]  - PASID
6528  * [127:96] - reserved
6529  */
6530 /**
6531  * cik_irq_process - interrupt handler
6532  *
6533  * @rdev: radeon_device pointer
6534  *
6535  * Interrupt hander (CIK).  Walk the IH ring,
6536  * ack interrupts and schedule work to handle
6537  * interrupt events.
6538  * Returns irq process return code.
6539  */
6540 int cik_irq_process(struct radeon_device *rdev)
6541 {
6542         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6543         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6544         u32 wptr;
6545         u32 rptr;
6546         u32 src_id, src_data, ring_id;
6547         u8 me_id, pipe_id, queue_id;
6548         u32 ring_index;
6549         bool queue_hotplug = false;
6550         bool queue_reset = false;
6551         u32 addr, status, mc_client;
6552         bool queue_thermal = false;
6553
6554         if (!rdev->ih.enabled || rdev->shutdown)
6555                 return IRQ_NONE;
6556
6557         wptr = cik_get_ih_wptr(rdev);
6558
6559 restart_ih:
6560         /* is somebody else already processing irqs? */
6561         if (atomic_xchg(&rdev->ih.lock, 1))
6562                 return IRQ_NONE;
6563
6564         rptr = rdev->ih.rptr;
6565         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6566
6567         /* Order reading of wptr vs. reading of IH ring data */
6568         rmb();
6569
6570         /* display interrupts */
6571         cik_irq_ack(rdev);
6572
6573         while (rptr != wptr) {
6574                 /* wptr/rptr are in bytes! */
6575                 ring_index = rptr / 4;
6576                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6577                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6578                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6579
6580                 switch (src_id) {
6581                 case 1: /* D1 vblank/vline */
6582                         switch (src_data) {
6583                         case 0: /* D1 vblank */
6584                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6585                                         if (rdev->irq.crtc_vblank_int[0]) {
6586                                                 drm_handle_vblank(rdev->ddev, 0);
6587                                                 rdev->pm.vblank_sync = true;
6588                                                 wake_up(&rdev->irq.vblank_queue);
6589                                         }
6590                                         if (atomic_read(&rdev->irq.pflip[0]))
6591                                                 radeon_crtc_handle_flip(rdev, 0);
6592                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6593                                         DRM_DEBUG("IH: D1 vblank\n");
6594                                 }
6595                                 break;
6596                         case 1: /* D1 vline */
6597                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6598                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6599                                         DRM_DEBUG("IH: D1 vline\n");
6600                                 }
6601                                 break;
6602                         default:
6603                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6604                                 break;
6605                         }
6606                         break;
6607                 case 2: /* D2 vblank/vline */
6608                         switch (src_data) {
6609                         case 0: /* D2 vblank */
6610                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6611                                         if (rdev->irq.crtc_vblank_int[1]) {
6612                                                 drm_handle_vblank(rdev->ddev, 1);
6613                                                 rdev->pm.vblank_sync = true;
6614                                                 wake_up(&rdev->irq.vblank_queue);
6615                                         }
6616                                         if (atomic_read(&rdev->irq.pflip[1]))
6617                                                 radeon_crtc_handle_flip(rdev, 1);
6618                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6619                                         DRM_DEBUG("IH: D2 vblank\n");
6620                                 }
6621                                 break;
6622                         case 1: /* D2 vline */
6623                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6624                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6625                                         DRM_DEBUG("IH: D2 vline\n");
6626                                 }
6627                                 break;
6628                         default:
6629                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6630                                 break;
6631                         }
6632                         break;
6633                 case 3: /* D3 vblank/vline */
6634                         switch (src_data) {
6635                         case 0: /* D3 vblank */
6636                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6637                                         if (rdev->irq.crtc_vblank_int[2]) {
6638                                                 drm_handle_vblank(rdev->ddev, 2);
6639                                                 rdev->pm.vblank_sync = true;
6640                                                 wake_up(&rdev->irq.vblank_queue);
6641                                         }
6642                                         if (atomic_read(&rdev->irq.pflip[2]))
6643                                                 radeon_crtc_handle_flip(rdev, 2);
6644                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6645                                         DRM_DEBUG("IH: D3 vblank\n");
6646                                 }
6647                                 break;
6648                         case 1: /* D3 vline */
6649                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6650                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6651                                         DRM_DEBUG("IH: D3 vline\n");
6652                                 }
6653                                 break;
6654                         default:
6655                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6656                                 break;
6657                         }
6658                         break;
6659                 case 4: /* D4 vblank/vline */
6660                         switch (src_data) {
6661                         case 0: /* D4 vblank */
6662                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6663                                         if (rdev->irq.crtc_vblank_int[3]) {
6664                                                 drm_handle_vblank(rdev->ddev, 3);
6665                                                 rdev->pm.vblank_sync = true;
6666                                                 wake_up(&rdev->irq.vblank_queue);
6667                                         }
6668                                         if (atomic_read(&rdev->irq.pflip[3]))
6669                                                 radeon_crtc_handle_flip(rdev, 3);
6670                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6671                                         DRM_DEBUG("IH: D4 vblank\n");
6672                                 }
6673                                 break;
6674                         case 1: /* D4 vline */
6675                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6676                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6677                                         DRM_DEBUG("IH: D4 vline\n");
6678                                 }
6679                                 break;
6680                         default:
6681                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6682                                 break;
6683                         }
6684                         break;
6685                 case 5: /* D5 vblank/vline */
6686                         switch (src_data) {
6687                         case 0: /* D5 vblank */
6688                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6689                                         if (rdev->irq.crtc_vblank_int[4]) {
6690                                                 drm_handle_vblank(rdev->ddev, 4);
6691                                                 rdev->pm.vblank_sync = true;
6692                                                 wake_up(&rdev->irq.vblank_queue);
6693                                         }
6694                                         if (atomic_read(&rdev->irq.pflip[4]))
6695                                                 radeon_crtc_handle_flip(rdev, 4);
6696                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6697                                         DRM_DEBUG("IH: D5 vblank\n");
6698                                 }
6699                                 break;
6700                         case 1: /* D5 vline */
6701                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6702                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6703                                         DRM_DEBUG("IH: D5 vline\n");
6704                                 }
6705                                 break;
6706                         default:
6707                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6708                                 break;
6709                         }
6710                         break;
6711                 case 6: /* D6 vblank/vline */
6712                         switch (src_data) {
6713                         case 0: /* D6 vblank */
6714                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6715                                         if (rdev->irq.crtc_vblank_int[5]) {
6716                                                 drm_handle_vblank(rdev->ddev, 5);
6717                                                 rdev->pm.vblank_sync = true;
6718                                                 wake_up(&rdev->irq.vblank_queue);
6719                                         }
6720                                         if (atomic_read(&rdev->irq.pflip[5]))
6721                                                 radeon_crtc_handle_flip(rdev, 5);
6722                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6723                                         DRM_DEBUG("IH: D6 vblank\n");
6724                                 }
6725                                 break;
6726                         case 1: /* D6 vline */
6727                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6728                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6729                                         DRM_DEBUG("IH: D6 vline\n");
6730                                 }
6731                                 break;
6732                         default:
6733                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6734                                 break;
6735                         }
6736                         break;
6737                 case 42: /* HPD hotplug */
6738                         switch (src_data) {
6739                         case 0:
6740                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6741                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6742                                         queue_hotplug = true;
6743                                         DRM_DEBUG("IH: HPD1\n");
6744                                 }
6745                                 break;
6746                         case 1:
6747                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6748                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6749                                         queue_hotplug = true;
6750                                         DRM_DEBUG("IH: HPD2\n");
6751                                 }
6752                                 break;
6753                         case 2:
6754                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6755                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6756                                         queue_hotplug = true;
6757                                         DRM_DEBUG("IH: HPD3\n");
6758                                 }
6759                                 break;
6760                         case 3:
6761                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6762                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6763                                         queue_hotplug = true;
6764                                         DRM_DEBUG("IH: HPD4\n");
6765                                 }
6766                                 break;
6767                         case 4:
6768                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6769                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6770                                         queue_hotplug = true;
6771                                         DRM_DEBUG("IH: HPD5\n");
6772                                 }
6773                                 break;
6774                         case 5:
6775                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6776                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6777                                         queue_hotplug = true;
6778                                         DRM_DEBUG("IH: HPD6\n");
6779                                 }
6780                                 break;
6781                         default:
6782                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6783                                 break;
6784                         }
6785                         break;
6786                 case 124: /* UVD */
6787                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6788                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6789                         break;
6790                 case 146:
6791                 case 147:
6792                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6793                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6794                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6795                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6796                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6797                                 addr);
6798                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6799                                 status);
6800                         cik_vm_decode_fault(rdev, status, addr, mc_client);
6801                         /* reset addr and status */
6802                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6803                         break;
6804                 case 176: /* GFX RB CP_INT */
6805                 case 177: /* GFX IB CP_INT */
6806                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6807                         break;
6808                 case 181: /* CP EOP event */
6809                         DRM_DEBUG("IH: CP EOP\n");
6810                         /* XXX check the bitfield order! */
6811                         me_id = (ring_id & 0x60) >> 5;
6812                         pipe_id = (ring_id & 0x18) >> 3;
6813                         queue_id = (ring_id & 0x7) >> 0;
6814                         switch (me_id) {
6815                         case 0:
6816                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6817                                 break;
6818                         case 1:
6819                         case 2:
6820                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6821                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6822                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6823                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6824                                 break;
6825                         }
6826                         break;
6827                 case 184: /* CP Privileged reg access */
6828                         DRM_ERROR("Illegal register access in command stream\n");
6829                         /* XXX check the bitfield order! */
6830                         me_id = (ring_id & 0x60) >> 5;
6831                         pipe_id = (ring_id & 0x18) >> 3;
6832                         queue_id = (ring_id & 0x7) >> 0;
6833                         switch (me_id) {
6834                         case 0:
6835                                 /* This results in a full GPU reset, but all we need to do is soft
6836                                  * reset the CP for gfx
6837                                  */
6838                                 queue_reset = true;
6839                                 break;
6840                         case 1:
6841                                 /* XXX compute */
6842                                 queue_reset = true;
6843                                 break;
6844                         case 2:
6845                                 /* XXX compute */
6846                                 queue_reset = true;
6847                                 break;
6848                         }
6849                         break;
6850                 case 185: /* CP Privileged inst */
6851                         DRM_ERROR("Illegal instruction in command stream\n");
6852                         /* XXX check the bitfield order! */
6853                         me_id = (ring_id & 0x60) >> 5;
6854                         pipe_id = (ring_id & 0x18) >> 3;
6855                         queue_id = (ring_id & 0x7) >> 0;
6856                         switch (me_id) {
6857                         case 0:
6858                                 /* This results in a full GPU reset, but all we need to do is soft
6859                                  * reset the CP for gfx
6860                                  */
6861                                 queue_reset = true;
6862                                 break;
6863                         case 1:
6864                                 /* XXX compute */
6865                                 queue_reset = true;
6866                                 break;
6867                         case 2:
6868                                 /* XXX compute */
6869                                 queue_reset = true;
6870                                 break;
6871                         }
6872                         break;
6873                 case 224: /* SDMA trap event */
6874                         /* XXX check the bitfield order! */
6875                         me_id = (ring_id & 0x3) >> 0;
6876                         queue_id = (ring_id & 0xc) >> 2;
6877                         DRM_DEBUG("IH: SDMA trap\n");
6878                         switch (me_id) {
6879                         case 0:
6880                                 switch (queue_id) {
6881                                 case 0:
6882                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6883                                         break;
6884                                 case 1:
6885                                         /* XXX compute */
6886                                         break;
6887                                 case 2:
6888                                         /* XXX compute */
6889                                         break;
6890                                 }
6891                                 break;
6892                         case 1:
6893                                 switch (queue_id) {
6894                                 case 0:
6895                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6896                                         break;
6897                                 case 1:
6898                                         /* XXX compute */
6899                                         break;
6900                                 case 2:
6901                                         /* XXX compute */
6902                                         break;
6903                                 }
6904                                 break;
6905                         }
6906                         break;
6907                 case 230: /* thermal low to high */
6908                         DRM_DEBUG("IH: thermal low to high\n");
6909                         rdev->pm.dpm.thermal.high_to_low = false;
6910                         queue_thermal = true;
6911                         break;
6912                 case 231: /* thermal high to low */
6913                         DRM_DEBUG("IH: thermal high to low\n");
6914                         rdev->pm.dpm.thermal.high_to_low = true;
6915                         queue_thermal = true;
6916                         break;
6917                 case 233: /* GUI IDLE */
6918                         DRM_DEBUG("IH: GUI idle\n");
6919                         break;
6920                 case 241: /* SDMA Privileged inst */
6921                 case 247: /* SDMA Privileged inst */
6922                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
6923                         /* XXX check the bitfield order! */
6924                         me_id = (ring_id & 0x3) >> 0;
6925                         queue_id = (ring_id & 0xc) >> 2;
6926                         switch (me_id) {
6927                         case 0:
6928                                 switch (queue_id) {
6929                                 case 0:
6930                                         queue_reset = true;
6931                                         break;
6932                                 case 1:
6933                                         /* XXX compute */
6934                                         queue_reset = true;
6935                                         break;
6936                                 case 2:
6937                                         /* XXX compute */
6938                                         queue_reset = true;
6939                                         break;
6940                                 }
6941                                 break;
6942                         case 1:
6943                                 switch (queue_id) {
6944                                 case 0:
6945                                         queue_reset = true;
6946                                         break;
6947                                 case 1:
6948                                         /* XXX compute */
6949                                         queue_reset = true;
6950                                         break;
6951                                 case 2:
6952                                         /* XXX compute */
6953                                         queue_reset = true;
6954                                         break;
6955                                 }
6956                                 break;
6957                         }
6958                         break;
6959                 default:
6960                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6961                         break;
6962                 }
6963
6964                 /* wptr/rptr are in bytes! */
6965                 rptr += 16;
6966                 rptr &= rdev->ih.ptr_mask;
6967         }
6968         if (queue_hotplug)
6969                 schedule_work(&rdev->hotplug_work);
6970         if (queue_reset)
6971                 schedule_work(&rdev->reset_work);
6972         if (queue_thermal)
6973                 schedule_work(&rdev->pm.dpm.thermal.work);
6974         rdev->ih.rptr = rptr;
6975         WREG32(IH_RB_RPTR, rdev->ih.rptr);
6976         atomic_set(&rdev->ih.lock, 0);
6977
6978         /* make sure wptr hasn't changed while processing */
6979         wptr = cik_get_ih_wptr(rdev);
6980         if (wptr != rptr)
6981                 goto restart_ih;
6982
6983         return IRQ_HANDLED;
6984 }
6985
6986 /*
6987  * startup/shutdown callbacks
6988  */
6989 /**
6990  * cik_startup - program the asic to a functional state
6991  *
6992  * @rdev: radeon_device pointer
6993  *
6994  * Programs the asic to a functional state (CIK).
6995  * Called by cik_init() and cik_resume().
6996  * Returns 0 for success, error for failure.
6997  */
6998 static int cik_startup(struct radeon_device *rdev)
6999 {
7000         struct radeon_ring *ring;
7001         int r;
7002
7003         /* enable pcie gen2/3 link */
7004         cik_pcie_gen3_enable(rdev);
7005         /* enable aspm */
7006         cik_program_aspm(rdev);
7007
7008         /* scratch needs to be initialized before MC */
7009         r = r600_vram_scratch_init(rdev);
7010         if (r)
7011                 return r;
7012
7013         cik_mc_program(rdev);
7014
7015         if (rdev->flags & RADEON_IS_IGP) {
7016                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7017                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7018                         r = cik_init_microcode(rdev);
7019                         if (r) {
7020                                 DRM_ERROR("Failed to load firmware!\n");
7021                                 return r;
7022                         }
7023                 }
7024         } else {
7025                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7026                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7027                     !rdev->mc_fw) {
7028                         r = cik_init_microcode(rdev);
7029                         if (r) {
7030                                 DRM_ERROR("Failed to load firmware!\n");
7031                                 return r;
7032                         }
7033                 }
7034
7035                 r = ci_mc_load_microcode(rdev);
7036                 if (r) {
7037                         DRM_ERROR("Failed to load MC firmware!\n");
7038                         return r;
7039                 }
7040         }
7041
7042         r = cik_pcie_gart_enable(rdev);
7043         if (r)
7044                 return r;
7045         cik_gpu_init(rdev);
7046
7047         /* allocate rlc buffers */
7048         if (rdev->flags & RADEON_IS_IGP) {
7049                 if (rdev->family == CHIP_KAVERI) {
7050                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7051                         rdev->rlc.reg_list_size =
7052                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7053                 } else {
7054                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7055                         rdev->rlc.reg_list_size =
7056                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7057                 }
7058         }
7059         rdev->rlc.cs_data = ci_cs_data;
7060         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7061         r = sumo_rlc_init(rdev);
7062         if (r) {
7063                 DRM_ERROR("Failed to init rlc BOs!\n");
7064                 return r;
7065         }
7066
7067         /* allocate wb buffer */
7068         r = radeon_wb_init(rdev);
7069         if (r)
7070                 return r;
7071
7072         /* allocate mec buffers */
7073         r = cik_mec_init(rdev);
7074         if (r) {
7075                 DRM_ERROR("Failed to init MEC BOs!\n");
7076                 return r;
7077         }
7078
7079         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7080         if (r) {
7081                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7082                 return r;
7083         }
7084
7085         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7086         if (r) {
7087                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7088                 return r;
7089         }
7090
7091         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7092         if (r) {
7093                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7094                 return r;
7095         }
7096
7097         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7098         if (r) {
7099                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7100                 return r;
7101         }
7102
7103         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7104         if (r) {
7105                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7106                 return r;
7107         }
7108
7109         r = radeon_uvd_resume(rdev);
7110         if (!r) {
7111                 r = uvd_v4_2_resume(rdev);
7112                 if (!r) {
7113                         r = radeon_fence_driver_start_ring(rdev,
7114                                                            R600_RING_TYPE_UVD_INDEX);
7115                         if (r)
7116                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7117                 }
7118         }
7119         if (r)
7120                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7121
7122         /* Enable IRQ */
7123         if (!rdev->irq.installed) {
7124                 r = radeon_irq_kms_init(rdev);
7125                 if (r)
7126                         return r;
7127         }
7128
7129         r = cik_irq_init(rdev);
7130         if (r) {
7131                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7132                 radeon_irq_kms_fini(rdev);
7133                 return r;
7134         }
7135         cik_irq_set(rdev);
7136
7137         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7138         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7139                              CP_RB0_RPTR, CP_RB0_WPTR,
7140                              PACKET3(PACKET3_NOP, 0x3FFF));
7141         if (r)
7142                 return r;
7143
7144         /* set up the compute queues */
7145         /* type-2 packets are deprecated on MEC, use type-3 instead */
7146         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7147         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7148                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7149                              PACKET3(PACKET3_NOP, 0x3FFF));
7150         if (r)
7151                 return r;
7152         ring->me = 1; /* first MEC */
7153         ring->pipe = 0; /* first pipe */
7154         ring->queue = 0; /* first queue */
7155         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7156
7157         /* type-2 packets are deprecated on MEC, use type-3 instead */
7158         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7159         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7160                              CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7161                              PACKET3(PACKET3_NOP, 0x3FFF));
7162         if (r)
7163                 return r;
7164         /* dGPU only have 1 MEC */
7165         ring->me = 1; /* first MEC */
7166         ring->pipe = 0; /* first pipe */
7167         ring->queue = 1; /* second queue */
7168         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7169
7170         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7171         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7172                              SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7173                              SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7174                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7175         if (r)
7176                 return r;
7177
7178         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7179         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7180                              SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7181                              SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7182                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7183         if (r)
7184                 return r;
7185
7186         r = cik_cp_resume(rdev);
7187         if (r)
7188                 return r;
7189
7190         r = cik_sdma_resume(rdev);
7191         if (r)
7192                 return r;
7193
7194         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7195         if (ring->ring_size) {
7196                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7197                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7198                                      RADEON_CP_PACKET2);
7199                 if (!r)
7200                         r = uvd_v1_0_init(rdev);
7201                 if (r)
7202                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7203         }
7204
7205         r = radeon_ib_pool_init(rdev);
7206         if (r) {
7207                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7208                 return r;
7209         }
7210
7211         r = radeon_vm_manager_init(rdev);
7212         if (r) {
7213                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7214                 return r;
7215         }
7216
7217         r = dce6_audio_init(rdev);
7218         if (r)
7219                 return r;
7220
7221         return 0;
7222 }
7223
7224 /**
7225  * cik_resume - resume the asic to a functional state
7226  *
7227  * @rdev: radeon_device pointer
7228  *
7229  * Programs the asic to a functional state (CIK).
7230  * Called at resume.
7231  * Returns 0 for success, error for failure.
7232  */
7233 int cik_resume(struct radeon_device *rdev)
7234 {
7235         int r;
7236
7237         /* post card */
7238         atom_asic_init(rdev->mode_info.atom_context);
7239
7240         /* init golden registers */
7241         cik_init_golden_registers(rdev);
7242
7243         rdev->accel_working = true;
7244         r = cik_startup(rdev);
7245         if (r) {
7246                 DRM_ERROR("cik startup failed on resume\n");
7247                 rdev->accel_working = false;
7248                 return r;
7249         }
7250
7251         return r;
7252
7253 }
7254
7255 /**
7256  * cik_suspend - suspend the asic
7257  *
7258  * @rdev: radeon_device pointer
7259  *
7260  * Bring the chip into a state suitable for suspend (CIK).
7261  * Called at suspend.
7262  * Returns 0 for success.
7263  */
7264 int cik_suspend(struct radeon_device *rdev)
7265 {
7266         dce6_audio_fini(rdev);
7267         radeon_vm_manager_fini(rdev);
7268         cik_cp_enable(rdev, false);
7269         cik_sdma_enable(rdev, false);
7270         uvd_v1_0_fini(rdev);
7271         radeon_uvd_suspend(rdev);
7272         cik_fini_pg(rdev);
7273         cik_fini_cg(rdev);
7274         cik_irq_suspend(rdev);
7275         radeon_wb_disable(rdev);
7276         cik_pcie_gart_disable(rdev);
7277         return 0;
7278 }
7279
7280 /* Plan is to move initialization in that function and use
7281  * helper function so that radeon_device_init pretty much
7282  * do nothing more than calling asic specific function. This
7283  * should also allow to remove a bunch of callback function
7284  * like vram_info.
7285  */
7286 /**
7287  * cik_init - asic specific driver and hw init
7288  *
7289  * @rdev: radeon_device pointer
7290  *
7291  * Setup asic specific driver variables and program the hw
7292  * to a functional state (CIK).
7293  * Called at driver startup.
7294  * Returns 0 for success, errors for failure.
7295  */
7296 int cik_init(struct radeon_device *rdev)
7297 {
7298         struct radeon_ring *ring;
7299         int r;
7300
7301         /* Read BIOS */
7302         if (!radeon_get_bios(rdev)) {
7303                 if (ASIC_IS_AVIVO(rdev))
7304                         return -EINVAL;
7305         }
7306         /* Must be an ATOMBIOS */
7307         if (!rdev->is_atom_bios) {
7308                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7309                 return -EINVAL;
7310         }
7311         r = radeon_atombios_init(rdev);
7312         if (r)
7313                 return r;
7314
7315         /* Post card if necessary */
7316         if (!radeon_card_posted(rdev)) {
7317                 if (!rdev->bios) {
7318                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7319                         return -EINVAL;
7320                 }
7321                 DRM_INFO("GPU not posted. posting now...\n");
7322                 atom_asic_init(rdev->mode_info.atom_context);
7323         }
7324         /* init golden registers */
7325         cik_init_golden_registers(rdev);
7326         /* Initialize scratch registers */
7327         cik_scratch_init(rdev);
7328         /* Initialize surface registers */
7329         radeon_surface_init(rdev);
7330         /* Initialize clocks */
7331         radeon_get_clock_info(rdev->ddev);
7332
7333         /* Fence driver */
7334         r = radeon_fence_driver_init(rdev);
7335         if (r)
7336                 return r;
7337
7338         /* initialize memory controller */
7339         r = cik_mc_init(rdev);
7340         if (r)
7341                 return r;
7342         /* Memory manager */
7343         r = radeon_bo_init(rdev);
7344         if (r)
7345                 return r;
7346
7347         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7348         ring->ring_obj = NULL;
7349         r600_ring_init(rdev, ring, 1024 * 1024);
7350
7351         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7352         ring->ring_obj = NULL;
7353         r600_ring_init(rdev, ring, 1024 * 1024);
7354         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7355         if (r)
7356                 return r;
7357
7358         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7359         ring->ring_obj = NULL;
7360         r600_ring_init(rdev, ring, 1024 * 1024);
7361         r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7362         if (r)
7363                 return r;
7364
7365         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7366         ring->ring_obj = NULL;
7367         r600_ring_init(rdev, ring, 256 * 1024);
7368
7369         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7370         ring->ring_obj = NULL;
7371         r600_ring_init(rdev, ring, 256 * 1024);
7372
7373         r = radeon_uvd_init(rdev);
7374         if (!r) {
7375                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7376                 ring->ring_obj = NULL;
7377                 r600_ring_init(rdev, ring, 4096);
7378         }
7379
7380         rdev->ih.ring_obj = NULL;
7381         r600_ih_ring_init(rdev, 64 * 1024);
7382
7383         r = r600_pcie_gart_init(rdev);
7384         if (r)
7385                 return r;
7386
7387         rdev->accel_working = true;
7388         r = cik_startup(rdev);
7389         if (r) {
7390                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7391                 cik_cp_fini(rdev);
7392                 cik_sdma_fini(rdev);
7393                 cik_irq_fini(rdev);
7394                 sumo_rlc_fini(rdev);
7395                 cik_mec_fini(rdev);
7396                 radeon_wb_fini(rdev);
7397                 radeon_ib_pool_fini(rdev);
7398                 radeon_vm_manager_fini(rdev);
7399                 radeon_irq_kms_fini(rdev);
7400                 cik_pcie_gart_fini(rdev);
7401                 rdev->accel_working = false;
7402         }
7403
7404         /* Don't start up if the MC ucode is missing.
7405          * The default clocks and voltages before the MC ucode
7406          * is loaded are not suffient for advanced operations.
7407          */
7408         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7409                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7410                 return -EINVAL;
7411         }
7412
7413         return 0;
7414 }
7415
7416 /**
7417  * cik_fini - asic specific driver and hw fini
7418  *
7419  * @rdev: radeon_device pointer
7420  *
7421  * Tear down the asic specific driver variables and program the hw
7422  * to an idle state (CIK).
7423  * Called at driver unload.
7424  */
7425 void cik_fini(struct radeon_device *rdev)
7426 {
7427         cik_cp_fini(rdev);
7428         cik_sdma_fini(rdev);
7429         cik_fini_pg(rdev);
7430         cik_fini_cg(rdev);
7431         cik_irq_fini(rdev);
7432         sumo_rlc_fini(rdev);
7433         cik_mec_fini(rdev);
7434         radeon_wb_fini(rdev);
7435         radeon_vm_manager_fini(rdev);
7436         radeon_ib_pool_fini(rdev);
7437         radeon_irq_kms_fini(rdev);
7438         uvd_v1_0_fini(rdev);
7439         radeon_uvd_fini(rdev);
7440         cik_pcie_gart_fini(rdev);
7441         r600_vram_scratch_fini(rdev);
7442         radeon_gem_fini(rdev);
7443         radeon_fence_driver_fini(rdev);
7444         radeon_bo_fini(rdev);
7445         radeon_atombios_fini(rdev);
7446         kfree(rdev->bios);
7447         rdev->bios = NULL;
7448 }
7449
7450 void dce8_program_fmt(struct drm_encoder *encoder)
7451 {
7452         struct drm_device *dev = encoder->dev;
7453         struct radeon_device *rdev = dev->dev_private;
7454         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
7455         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
7456         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
7457         int bpc = 0;
7458         u32 tmp = 0;
7459         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
7460
7461         if (connector) {
7462                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
7463                 bpc = radeon_get_monitor_bpc(connector);
7464                 dither = radeon_connector->dither;
7465         }
7466
7467         /* LVDS/eDP FMT is set up by atom */
7468         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
7469                 return;
7470
7471         /* not needed for analog */
7472         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
7473             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
7474                 return;
7475
7476         if (bpc == 0)
7477                 return;
7478
7479         switch (bpc) {
7480         case 6:
7481                 if (dither == RADEON_FMT_DITHER_ENABLE)
7482                         /* XXX sort out optimal dither settings */
7483                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7484                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
7485                 else
7486                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
7487                 break;
7488         case 8:
7489                 if (dither == RADEON_FMT_DITHER_ENABLE)
7490                         /* XXX sort out optimal dither settings */
7491                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7492                                 FMT_RGB_RANDOM_ENABLE |
7493                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
7494                 else
7495                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
7496                 break;
7497         case 10:
7498                 if (dither == RADEON_FMT_DITHER_ENABLE)
7499                         /* XXX sort out optimal dither settings */
7500                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
7501                                 FMT_RGB_RANDOM_ENABLE |
7502                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
7503                 else
7504                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
7505                 break;
7506         default:
7507                 /* not needed */
7508                 break;
7509         }
7510
7511         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
7512 }
7513
7514 /* display watermark setup */
7515 /**
7516  * dce8_line_buffer_adjust - Set up the line buffer
7517  *
7518  * @rdev: radeon_device pointer
7519  * @radeon_crtc: the selected display controller
7520  * @mode: the current display mode on the selected display
7521  * controller
7522  *
7523  * Setup up the line buffer allocation for
7524  * the selected display controller (CIK).
7525  * Returns the line buffer size in pixels.
7526  */
7527 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7528                                    struct radeon_crtc *radeon_crtc,
7529                                    struct drm_display_mode *mode)
7530 {
7531         u32 tmp, buffer_alloc, i;
7532         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7533         /*
7534          * Line Buffer Setup
7535          * There are 6 line buffers, one for each display controllers.
7536          * There are 3 partitions per LB. Select the number of partitions
7537          * to enable based on the display width.  For display widths larger
7538          * than 4096, you need use to use 2 display controllers and combine
7539          * them using the stereo blender.
7540          */
7541         if (radeon_crtc->base.enabled && mode) {
7542                 if (mode->crtc_hdisplay < 1920) {
7543                         tmp = 1;
7544                         buffer_alloc = 2;
7545                 } else if (mode->crtc_hdisplay < 2560) {
7546                         tmp = 2;
7547                         buffer_alloc = 2;
7548                 } else if (mode->crtc_hdisplay < 4096) {
7549                         tmp = 0;
7550                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7551                 } else {
7552                         DRM_DEBUG_KMS("Mode too big for LB!\n");
7553                         tmp = 0;
7554                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7555                 }
7556         } else {
7557                 tmp = 1;
7558                 buffer_alloc = 0;
7559         }
7560
7561         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7562                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7563
7564         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7565                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7566         for (i = 0; i < rdev->usec_timeout; i++) {
7567                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7568                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
7569                         break;
7570                 udelay(1);
7571         }
7572
7573         if (radeon_crtc->base.enabled && mode) {
7574                 switch (tmp) {
7575                 case 0:
7576                 default:
7577                         return 4096 * 2;
7578                 case 1:
7579                         return 1920 * 2;
7580                 case 2:
7581                         return 2560 * 2;
7582                 }
7583         }
7584
7585         /* controller not enabled, so no lb used */
7586         return 0;
7587 }
7588
7589 /**
7590  * cik_get_number_of_dram_channels - get the number of dram channels
7591  *
7592  * @rdev: radeon_device pointer
7593  *
7594  * Look up the number of video ram channels (CIK).
7595  * Used for display watermark bandwidth calculations
7596  * Returns the number of dram channels
7597  */
7598 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7599 {
7600         u32 tmp = RREG32(MC_SHARED_CHMAP);
7601
7602         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7603         case 0:
7604         default:
7605                 return 1;
7606         case 1:
7607                 return 2;
7608         case 2:
7609                 return 4;
7610         case 3:
7611                 return 8;
7612         case 4:
7613                 return 3;
7614         case 5:
7615                 return 6;
7616         case 6:
7617                 return 10;
7618         case 7:
7619                 return 12;
7620         case 8:
7621                 return 16;
7622         }
7623 }
7624
7625 struct dce8_wm_params {
7626         u32 dram_channels; /* number of dram channels */
7627         u32 yclk;          /* bandwidth per dram data pin in kHz */
7628         u32 sclk;          /* engine clock in kHz */
7629         u32 disp_clk;      /* display clock in kHz */
7630         u32 src_width;     /* viewport width */
7631         u32 active_time;   /* active display time in ns */
7632         u32 blank_time;    /* blank time in ns */
7633         bool interlaced;    /* mode is interlaced */
7634         fixed20_12 vsc;    /* vertical scale ratio */
7635         u32 num_heads;     /* number of active crtcs */
7636         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7637         u32 lb_size;       /* line buffer allocated to pipe */
7638         u32 vtaps;         /* vertical scaler taps */
7639 };
7640
7641 /**
7642  * dce8_dram_bandwidth - get the dram bandwidth
7643  *
7644  * @wm: watermark calculation data
7645  *
7646  * Calculate the raw dram bandwidth (CIK).
7647  * Used for display watermark bandwidth calculations
7648  * Returns the dram bandwidth in MBytes/s
7649  */
7650 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7651 {
7652         /* Calculate raw DRAM Bandwidth */
7653         fixed20_12 dram_efficiency; /* 0.7 */
7654         fixed20_12 yclk, dram_channels, bandwidth;
7655         fixed20_12 a;
7656
7657         a.full = dfixed_const(1000);
7658         yclk.full = dfixed_const(wm->yclk);
7659         yclk.full = dfixed_div(yclk, a);
7660         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7661         a.full = dfixed_const(10);
7662         dram_efficiency.full = dfixed_const(7);
7663         dram_efficiency.full = dfixed_div(dram_efficiency, a);
7664         bandwidth.full = dfixed_mul(dram_channels, yclk);
7665         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7666
7667         return dfixed_trunc(bandwidth);
7668 }
7669
7670 /**
7671  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7672  *
7673  * @wm: watermark calculation data
7674  *
7675  * Calculate the dram bandwidth used for display (CIK).
7676  * Used for display watermark bandwidth calculations
7677  * Returns the dram bandwidth for display in MBytes/s
7678  */
7679 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7680 {
7681         /* Calculate DRAM Bandwidth and the part allocated to display. */
7682         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7683         fixed20_12 yclk, dram_channels, bandwidth;
7684         fixed20_12 a;
7685
7686         a.full = dfixed_const(1000);
7687         yclk.full = dfixed_const(wm->yclk);
7688         yclk.full = dfixed_div(yclk, a);
7689         dram_channels.full = dfixed_const(wm->dram_channels * 4);
7690         a.full = dfixed_const(10);
7691         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7692         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7693         bandwidth.full = dfixed_mul(dram_channels, yclk);
7694         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7695
7696         return dfixed_trunc(bandwidth);
7697 }
7698
7699 /**
7700  * dce8_data_return_bandwidth - get the data return bandwidth
7701  *
7702  * @wm: watermark calculation data
7703  *
7704  * Calculate the data return bandwidth used for display (CIK).
7705  * Used for display watermark bandwidth calculations
7706  * Returns the data return bandwidth in MBytes/s
7707  */
7708 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7709 {
7710         /* Calculate the display Data return Bandwidth */
7711         fixed20_12 return_efficiency; /* 0.8 */
7712         fixed20_12 sclk, bandwidth;
7713         fixed20_12 a;
7714
7715         a.full = dfixed_const(1000);
7716         sclk.full = dfixed_const(wm->sclk);
7717         sclk.full = dfixed_div(sclk, a);
7718         a.full = dfixed_const(10);
7719         return_efficiency.full = dfixed_const(8);
7720         return_efficiency.full = dfixed_div(return_efficiency, a);
7721         a.full = dfixed_const(32);
7722         bandwidth.full = dfixed_mul(a, sclk);
7723         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7724
7725         return dfixed_trunc(bandwidth);
7726 }
7727
7728 /**
7729  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7730  *
7731  * @wm: watermark calculation data
7732  *
7733  * Calculate the dmif bandwidth used for display (CIK).
7734  * Used for display watermark bandwidth calculations
7735  * Returns the dmif bandwidth in MBytes/s
7736  */
7737 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7738 {
7739         /* Calculate the DMIF Request Bandwidth */
7740         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7741         fixed20_12 disp_clk, bandwidth;
7742         fixed20_12 a, b;
7743
7744         a.full = dfixed_const(1000);
7745         disp_clk.full = dfixed_const(wm->disp_clk);
7746         disp_clk.full = dfixed_div(disp_clk, a);
7747         a.full = dfixed_const(32);
7748         b.full = dfixed_mul(a, disp_clk);
7749
7750         a.full = dfixed_const(10);
7751         disp_clk_request_efficiency.full = dfixed_const(8);
7752         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7753
7754         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7755
7756         return dfixed_trunc(bandwidth);
7757 }
7758
7759 /**
7760  * dce8_available_bandwidth - get the min available bandwidth
7761  *
7762  * @wm: watermark calculation data
7763  *
7764  * Calculate the min available bandwidth used for display (CIK).
7765  * Used for display watermark bandwidth calculations
7766  * Returns the min available bandwidth in MBytes/s
7767  */
7768 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7769 {
7770         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7771         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7772         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7773         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7774
7775         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7776 }
7777
7778 /**
7779  * dce8_average_bandwidth - get the average available bandwidth
7780  *
7781  * @wm: watermark calculation data
7782  *
7783  * Calculate the average available bandwidth used for display (CIK).
7784  * Used for display watermark bandwidth calculations
7785  * Returns the average available bandwidth in MBytes/s
7786  */
7787 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7788 {
7789         /* Calculate the display mode Average Bandwidth
7790          * DisplayMode should contain the source and destination dimensions,
7791          * timing, etc.
7792          */
7793         fixed20_12 bpp;
7794         fixed20_12 line_time;
7795         fixed20_12 src_width;
7796         fixed20_12 bandwidth;
7797         fixed20_12 a;
7798
7799         a.full = dfixed_const(1000);
7800         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7801         line_time.full = dfixed_div(line_time, a);
7802         bpp.full = dfixed_const(wm->bytes_per_pixel);
7803         src_width.full = dfixed_const(wm->src_width);
7804         bandwidth.full = dfixed_mul(src_width, bpp);
7805         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7806         bandwidth.full = dfixed_div(bandwidth, line_time);
7807
7808         return dfixed_trunc(bandwidth);
7809 }
7810
7811 /**
7812  * dce8_latency_watermark - get the latency watermark
7813  *
7814  * @wm: watermark calculation data
7815  *
7816  * Calculate the latency watermark (CIK).
7817  * Used for display watermark bandwidth calculations
7818  * Returns the latency watermark in ns
7819  */
7820 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7821 {
7822         /* First calculate the latency in ns */
7823         u32 mc_latency = 2000; /* 2000 ns. */
7824         u32 available_bandwidth = dce8_available_bandwidth(wm);
7825         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7826         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7827         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7828         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7829                 (wm->num_heads * cursor_line_pair_return_time);
7830         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7831         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7832         u32 tmp, dmif_size = 12288;
7833         fixed20_12 a, b, c;
7834
7835         if (wm->num_heads == 0)
7836                 return 0;
7837
7838         a.full = dfixed_const(2);
7839         b.full = dfixed_const(1);
7840         if ((wm->vsc.full > a.full) ||
7841             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7842             (wm->vtaps >= 5) ||
7843             ((wm->vsc.full >= a.full) && wm->interlaced))
7844                 max_src_lines_per_dst_line = 4;
7845         else
7846                 max_src_lines_per_dst_line = 2;
7847
7848         a.full = dfixed_const(available_bandwidth);
7849         b.full = dfixed_const(wm->num_heads);
7850         a.full = dfixed_div(a, b);
7851
7852         b.full = dfixed_const(mc_latency + 512);
7853         c.full = dfixed_const(wm->disp_clk);
7854         b.full = dfixed_div(b, c);
7855
7856         c.full = dfixed_const(dmif_size);
7857         b.full = dfixed_div(c, b);
7858
7859         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7860
7861         b.full = dfixed_const(1000);
7862         c.full = dfixed_const(wm->disp_clk);
7863         b.full = dfixed_div(c, b);
7864         c.full = dfixed_const(wm->bytes_per_pixel);
7865         b.full = dfixed_mul(b, c);
7866
7867         lb_fill_bw = min(tmp, dfixed_trunc(b));
7868
7869         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7870         b.full = dfixed_const(1000);
7871         c.full = dfixed_const(lb_fill_bw);
7872         b.full = dfixed_div(c, b);
7873         a.full = dfixed_div(a, b);
7874         line_fill_time = dfixed_trunc(a);
7875
7876         if (line_fill_time < wm->active_time)
7877                 return latency;
7878         else
7879                 return latency + (line_fill_time - wm->active_time);
7880
7881 }
7882
7883 /**
7884  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7885  * average and available dram bandwidth
7886  *
7887  * @wm: watermark calculation data
7888  *
7889  * Check if the display average bandwidth fits in the display
7890  * dram bandwidth (CIK).
7891  * Used for display watermark bandwidth calculations
7892  * Returns true if the display fits, false if not.
7893  */
7894 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7895 {
7896         if (dce8_average_bandwidth(wm) <=
7897             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7898                 return true;
7899         else
7900                 return false;
7901 }
7902
7903 /**
7904  * dce8_average_bandwidth_vs_available_bandwidth - check
7905  * average and available bandwidth
7906  *
7907  * @wm: watermark calculation data
7908  *
7909  * Check if the display average bandwidth fits in the display
7910  * available bandwidth (CIK).
7911  * Used for display watermark bandwidth calculations
7912  * Returns true if the display fits, false if not.
7913  */
7914 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7915 {
7916         if (dce8_average_bandwidth(wm) <=
7917             (dce8_available_bandwidth(wm) / wm->num_heads))
7918                 return true;
7919         else
7920                 return false;
7921 }
7922
7923 /**
7924  * dce8_check_latency_hiding - check latency hiding
7925  *
7926  * @wm: watermark calculation data
7927  *
7928  * Check latency hiding (CIK).
7929  * Used for display watermark bandwidth calculations
7930  * Returns true if the display fits, false if not.
7931  */
7932 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7933 {
7934         u32 lb_partitions = wm->lb_size / wm->src_width;
7935         u32 line_time = wm->active_time + wm->blank_time;
7936         u32 latency_tolerant_lines;
7937         u32 latency_hiding;
7938         fixed20_12 a;
7939
7940         a.full = dfixed_const(1);
7941         if (wm->vsc.full > a.full)
7942                 latency_tolerant_lines = 1;
7943         else {
7944                 if (lb_partitions <= (wm->vtaps + 1))
7945                         latency_tolerant_lines = 1;
7946                 else
7947                         latency_tolerant_lines = 2;
7948         }
7949
7950         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7951
7952         if (dce8_latency_watermark(wm) <= latency_hiding)
7953                 return true;
7954         else
7955                 return false;
7956 }
7957
7958 /**
7959  * dce8_program_watermarks - program display watermarks
7960  *
7961  * @rdev: radeon_device pointer
7962  * @radeon_crtc: the selected display controller
7963  * @lb_size: line buffer size
7964  * @num_heads: number of display controllers in use
7965  *
7966  * Calculate and program the display watermarks for the
7967  * selected display controller (CIK).
7968  */
7969 static void dce8_program_watermarks(struct radeon_device *rdev,
7970                                     struct radeon_crtc *radeon_crtc,
7971                                     u32 lb_size, u32 num_heads)
7972 {
7973         struct drm_display_mode *mode = &radeon_crtc->base.mode;
7974         struct dce8_wm_params wm_low, wm_high;
7975         u32 pixel_period;
7976         u32 line_time = 0;
7977         u32 latency_watermark_a = 0, latency_watermark_b = 0;
7978         u32 tmp, wm_mask;
7979
7980         if (radeon_crtc->base.enabled && num_heads && mode) {
7981                 pixel_period = 1000000 / (u32)mode->clock;
7982                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7983
7984                 /* watermark for high clocks */
7985                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7986                     rdev->pm.dpm_enabled) {
7987                         wm_high.yclk =
7988                                 radeon_dpm_get_mclk(rdev, false) * 10;
7989                         wm_high.sclk =
7990                                 radeon_dpm_get_sclk(rdev, false) * 10;
7991                 } else {
7992                         wm_high.yclk = rdev->pm.current_mclk * 10;
7993                         wm_high.sclk = rdev->pm.current_sclk * 10;
7994                 }
7995
7996                 wm_high.disp_clk = mode->clock;
7997                 wm_high.src_width = mode->crtc_hdisplay;
7998                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7999                 wm_high.blank_time = line_time - wm_high.active_time;
8000                 wm_high.interlaced = false;
8001                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8002                         wm_high.interlaced = true;
8003                 wm_high.vsc = radeon_crtc->vsc;
8004                 wm_high.vtaps = 1;
8005                 if (radeon_crtc->rmx_type != RMX_OFF)
8006                         wm_high.vtaps = 2;
8007                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8008                 wm_high.lb_size = lb_size;
8009                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8010                 wm_high.num_heads = num_heads;
8011
8012                 /* set for high clocks */
8013                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8014
8015                 /* possibly force display priority to high */
8016                 /* should really do this at mode validation time... */
8017                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8018                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8019                     !dce8_check_latency_hiding(&wm_high) ||
8020                     (rdev->disp_priority == 2)) {
8021                         DRM_DEBUG_KMS("force priority to high\n");
8022                 }
8023
8024                 /* watermark for low clocks */
8025                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8026                     rdev->pm.dpm_enabled) {
8027                         wm_low.yclk =
8028                                 radeon_dpm_get_mclk(rdev, true) * 10;
8029                         wm_low.sclk =
8030                                 radeon_dpm_get_sclk(rdev, true) * 10;
8031                 } else {
8032                         wm_low.yclk = rdev->pm.current_mclk * 10;
8033                         wm_low.sclk = rdev->pm.current_sclk * 10;
8034                 }
8035
8036                 wm_low.disp_clk = mode->clock;
8037                 wm_low.src_width = mode->crtc_hdisplay;
8038                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8039                 wm_low.blank_time = line_time - wm_low.active_time;
8040                 wm_low.interlaced = false;
8041                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8042                         wm_low.interlaced = true;
8043                 wm_low.vsc = radeon_crtc->vsc;
8044                 wm_low.vtaps = 1;
8045                 if (radeon_crtc->rmx_type != RMX_OFF)
8046                         wm_low.vtaps = 2;
8047                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8048                 wm_low.lb_size = lb_size;
8049                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8050                 wm_low.num_heads = num_heads;
8051
8052                 /* set for low clocks */
8053                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8054
8055                 /* possibly force display priority to high */
8056                 /* should really do this at mode validation time... */
8057                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8058                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8059                     !dce8_check_latency_hiding(&wm_low) ||
8060                     (rdev->disp_priority == 2)) {
8061                         DRM_DEBUG_KMS("force priority to high\n");
8062                 }
8063         }
8064
8065         /* select wm A */
8066         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8067         tmp = wm_mask;
8068         tmp &= ~LATENCY_WATERMARK_MASK(3);
8069         tmp |= LATENCY_WATERMARK_MASK(1);
8070         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8071         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8072                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8073                 LATENCY_HIGH_WATERMARK(line_time)));
8074         /* select wm B */
8075         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8076         tmp &= ~LATENCY_WATERMARK_MASK(3);
8077         tmp |= LATENCY_WATERMARK_MASK(2);
8078         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8079         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8080                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8081                 LATENCY_HIGH_WATERMARK(line_time)));
8082         /* restore original selection */
8083         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8084
8085         /* save values for DPM */
8086         radeon_crtc->line_time = line_time;
8087         radeon_crtc->wm_high = latency_watermark_a;
8088         radeon_crtc->wm_low = latency_watermark_b;
8089 }
8090
8091 /**
8092  * dce8_bandwidth_update - program display watermarks
8093  *
8094  * @rdev: radeon_device pointer
8095  *
8096  * Calculate and program the display watermarks and line
8097  * buffer allocation (CIK).
8098  */
8099 void dce8_bandwidth_update(struct radeon_device *rdev)
8100 {
8101         struct drm_display_mode *mode = NULL;
8102         u32 num_heads = 0, lb_size;
8103         int i;
8104
8105         radeon_update_display_priority(rdev);
8106
8107         for (i = 0; i < rdev->num_crtc; i++) {
8108                 if (rdev->mode_info.crtcs[i]->base.enabled)
8109                         num_heads++;
8110         }
8111         for (i = 0; i < rdev->num_crtc; i++) {
8112                 mode = &rdev->mode_info.crtcs[i]->base.mode;
8113                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8114                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8115         }
8116 }
8117
8118 /**
8119  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8120  *
8121  * @rdev: radeon_device pointer
8122  *
8123  * Fetches a GPU clock counter snapshot (SI).
8124  * Returns the 64 bit clock counter snapshot.
8125  */
8126 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8127 {
8128         uint64_t clock;
8129
8130         mutex_lock(&rdev->gpu_clock_mutex);
8131         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8132         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8133                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8134         mutex_unlock(&rdev->gpu_clock_mutex);
8135         return clock;
8136 }
8137
8138 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8139                               u32 cntl_reg, u32 status_reg)
8140 {
8141         int r, i;
8142         struct atom_clock_dividers dividers;
8143         uint32_t tmp;
8144
8145         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8146                                            clock, false, &dividers);
8147         if (r)
8148                 return r;
8149
8150         tmp = RREG32_SMC(cntl_reg);
8151         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8152         tmp |= dividers.post_divider;
8153         WREG32_SMC(cntl_reg, tmp);
8154
8155         for (i = 0; i < 100; i++) {
8156                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8157                         break;
8158                 mdelay(10);
8159         }
8160         if (i == 100)
8161                 return -ETIMEDOUT;
8162
8163         return 0;
8164 }
8165
8166 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8167 {
8168         int r = 0;
8169
8170         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8171         if (r)
8172                 return r;
8173
8174         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8175         return r;
8176 }
8177
8178 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8179 {
8180         struct pci_dev *root = rdev->pdev->bus->self;
8181         int bridge_pos, gpu_pos;
8182         u32 speed_cntl, mask, current_data_rate;
8183         int ret, i;
8184         u16 tmp16;
8185
8186         if (radeon_pcie_gen2 == 0)
8187                 return;
8188
8189         if (rdev->flags & RADEON_IS_IGP)
8190                 return;
8191
8192         if (!(rdev->flags & RADEON_IS_PCIE))
8193                 return;
8194
8195         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8196         if (ret != 0)
8197                 return;
8198
8199         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8200                 return;
8201
8202         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8203         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8204                 LC_CURRENT_DATA_RATE_SHIFT;
8205         if (mask & DRM_PCIE_SPEED_80) {
8206                 if (current_data_rate == 2) {
8207                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8208                         return;
8209                 }
8210                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8211         } else if (mask & DRM_PCIE_SPEED_50) {
8212                 if (current_data_rate == 1) {
8213                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8214                         return;
8215                 }
8216                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8217         }
8218
8219         bridge_pos = pci_pcie_cap(root);
8220         if (!bridge_pos)
8221                 return;
8222
8223         gpu_pos = pci_pcie_cap(rdev->pdev);
8224         if (!gpu_pos)
8225                 return;
8226
8227         if (mask & DRM_PCIE_SPEED_80) {
8228                 /* re-try equalization if gen3 is not already enabled */
8229                 if (current_data_rate != 2) {
8230                         u16 bridge_cfg, gpu_cfg;
8231                         u16 bridge_cfg2, gpu_cfg2;
8232                         u32 max_lw, current_lw, tmp;
8233
8234                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8235                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8236
8237                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8238                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8239
8240                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8241                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8242
8243                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8244                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8245                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8246
8247                         if (current_lw < max_lw) {
8248                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8249                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8250                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8251                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8252                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8253                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8254                                 }
8255                         }
8256
8257                         for (i = 0; i < 10; i++) {
8258                                 /* check status */
8259                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8260                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8261                                         break;
8262
8263                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8264                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8265
8266                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8267                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8268
8269                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8270                                 tmp |= LC_SET_QUIESCE;
8271                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8272
8273                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8274                                 tmp |= LC_REDO_EQ;
8275                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8276
8277                                 mdelay(100);
8278
8279                                 /* linkctl */
8280                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8281                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8282                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8283                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8284
8285                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8286                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8287                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8288                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8289
8290                                 /* linkctl2 */
8291                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8292                                 tmp16 &= ~((1 << 4) | (7 << 9));
8293                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8294                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8295
8296                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8297                                 tmp16 &= ~((1 << 4) | (7 << 9));
8298                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8299                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8300
8301                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8302                                 tmp &= ~LC_SET_QUIESCE;
8303                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8304                         }
8305                 }
8306         }
8307
8308         /* set the link speed */
8309         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8310         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8311         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8312
8313         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8314         tmp16 &= ~0xf;
8315         if (mask & DRM_PCIE_SPEED_80)
8316                 tmp16 |= 3; /* gen3 */
8317         else if (mask & DRM_PCIE_SPEED_50)
8318                 tmp16 |= 2; /* gen2 */
8319         else
8320                 tmp16 |= 1; /* gen1 */
8321         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8322
8323         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8324         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8325         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8326
8327         for (i = 0; i < rdev->usec_timeout; i++) {
8328                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8329                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8330                         break;
8331                 udelay(1);
8332         }
8333 }
8334
8335 static void cik_program_aspm(struct radeon_device *rdev)
8336 {
8337         u32 data, orig;
8338         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8339         bool disable_clkreq = false;
8340
8341         if (radeon_aspm == 0)
8342                 return;
8343
8344         /* XXX double check IGPs */
8345         if (rdev->flags & RADEON_IS_IGP)
8346                 return;
8347
8348         if (!(rdev->flags & RADEON_IS_PCIE))
8349                 return;
8350
8351         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8352         data &= ~LC_XMIT_N_FTS_MASK;
8353         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8354         if (orig != data)
8355                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8356
8357         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8358         data |= LC_GO_TO_RECOVERY;
8359         if (orig != data)
8360                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8361
8362         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8363         data |= P_IGNORE_EDB_ERR;
8364         if (orig != data)
8365                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8366
8367         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8368         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8369         data |= LC_PMI_TO_L1_DIS;
8370         if (!disable_l0s)
8371                 data |= LC_L0S_INACTIVITY(7);
8372
8373         if (!disable_l1) {
8374                 data |= LC_L1_INACTIVITY(7);
8375                 data &= ~LC_PMI_TO_L1_DIS;
8376                 if (orig != data)
8377                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8378
8379                 if (!disable_plloff_in_l1) {
8380                         bool clk_req_support;
8381
8382                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8383                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8384                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8385                         if (orig != data)
8386                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8387
8388                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8389                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8390                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8391                         if (orig != data)
8392                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8393
8394                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8395                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8396                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8397                         if (orig != data)
8398                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8399
8400                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8401                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8402                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8403                         if (orig != data)
8404                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8405
8406                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8407                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8408                         data |= LC_DYN_LANES_PWR_STATE(3);
8409                         if (orig != data)
8410                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8411
8412                         if (!disable_clkreq) {
8413                                 struct pci_dev *root = rdev->pdev->bus->self;
8414                                 u32 lnkcap;
8415
8416                                 clk_req_support = false;
8417                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8418                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8419                                         clk_req_support = true;
8420                         } else {
8421                                 clk_req_support = false;
8422                         }
8423
8424                         if (clk_req_support) {
8425                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8426                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8427                                 if (orig != data)
8428                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8429
8430                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
8431                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8432                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8433                                 if (orig != data)
8434                                         WREG32_SMC(THM_CLK_CNTL, data);
8435
8436                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8437                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8438                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8439                                 if (orig != data)
8440                                         WREG32_SMC(MISC_CLK_CTRL, data);
8441
8442                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8443                                 data &= ~BCLK_AS_XCLK;
8444                                 if (orig != data)
8445                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
8446
8447                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8448                                 data &= ~FORCE_BIF_REFCLK_EN;
8449                                 if (orig != data)
8450                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8451
8452                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8453                                 data &= ~MPLL_CLKOUT_SEL_MASK;
8454                                 data |= MPLL_CLKOUT_SEL(4);
8455                                 if (orig != data)
8456                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8457                         }
8458                 }
8459         } else {
8460                 if (orig != data)
8461                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8462         }
8463
8464         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8465         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8466         if (orig != data)
8467                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8468
8469         if (!disable_l0s) {
8470                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8471                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8472                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8473                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8474                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8475                                 data &= ~LC_L0S_INACTIVITY_MASK;
8476                                 if (orig != data)
8477                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8478                         }
8479                 }
8480         }
8481 }