blob: d3cea7453f2909ca9e5648efbeff09f17e1bd503 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040047/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050050
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040057MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050058MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040063MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050064MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040069MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050070
Alex Deuchera59781b2012-11-09 10:45:57 -050071extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040075extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040076extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040077extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040079static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040080
Alex Deucher6e2c3c02013-04-03 19:28:32 -040081/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
Alex Deucher2c679122013-04-09 13:32:18 -0400102/**
103 * cik_get_xclk - get the xclk
104 *
105 * @rdev: radeon_device pointer
106 *
107 * Returns the reference clock used by the gfx engine
108 * (CIK).
109 */
110u32 cik_get_xclk(struct radeon_device *rdev)
111{
112 u32 reference_clock = rdev->clock.spll.reference_freq;
113
114 if (rdev->flags & RADEON_IS_IGP) {
115 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
116 return reference_clock / 2;
117 } else {
118 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
119 return reference_clock / 4;
120 }
121 return reference_clock;
122}
123
Alex Deucher75efdee2013-03-04 12:47:46 -0500124/**
125 * cik_mm_rdoorbell - read a doorbell dword
126 *
127 * @rdev: radeon_device pointer
128 * @offset: byte offset into the aperture
129 *
130 * Returns the value in the doorbell aperture at the
131 * requested offset (CIK).
132 */
133u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
134{
135 if (offset < rdev->doorbell.size) {
136 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
137 } else {
138 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
139 return 0;
140 }
141}
142
143/**
144 * cik_mm_wdoorbell - write a doorbell dword
145 *
146 * @rdev: radeon_device pointer
147 * @offset: byte offset into the aperture
148 * @v: value to write
149 *
150 * Writes @v to the doorbell aperture at the
151 * requested offset (CIK).
152 */
153void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
154{
155 if (offset < rdev->doorbell.size) {
156 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
157 } else {
158 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
159 }
160}
161
Alex Deucherbc8273f2012-06-29 19:44:04 -0400162#define BONAIRE_IO_MC_REGS_SIZE 36
163
164static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
165{
166 {0x00000070, 0x04400000},
167 {0x00000071, 0x80c01803},
168 {0x00000072, 0x00004004},
169 {0x00000073, 0x00000100},
170 {0x00000074, 0x00ff0000},
171 {0x00000075, 0x34000000},
172 {0x00000076, 0x08000014},
173 {0x00000077, 0x00cc08ec},
174 {0x00000078, 0x00000400},
175 {0x00000079, 0x00000000},
176 {0x0000007a, 0x04090000},
177 {0x0000007c, 0x00000000},
178 {0x0000007e, 0x4408a8e8},
179 {0x0000007f, 0x00000304},
180 {0x00000080, 0x00000000},
181 {0x00000082, 0x00000001},
182 {0x00000083, 0x00000002},
183 {0x00000084, 0xf3e4f400},
184 {0x00000085, 0x052024e3},
185 {0x00000087, 0x00000000},
186 {0x00000088, 0x01000000},
187 {0x0000008a, 0x1c0a0000},
188 {0x0000008b, 0xff010000},
189 {0x0000008d, 0xffffefff},
190 {0x0000008e, 0xfff3efff},
191 {0x0000008f, 0xfff3efbf},
192 {0x00000092, 0xf7ffffff},
193 {0x00000093, 0xffffff7f},
194 {0x00000095, 0x00101101},
195 {0x00000096, 0x00000fff},
196 {0x00000097, 0x00116fff},
197 {0x00000098, 0x60010000},
198 {0x00000099, 0x10010000},
199 {0x0000009a, 0x00006000},
200 {0x0000009b, 0x00001000},
201 {0x0000009f, 0x00b48000}
202};
203
Alex Deucherb556b122013-01-29 10:44:22 -0500204/**
205 * cik_srbm_select - select specific register instances
206 *
207 * @rdev: radeon_device pointer
208 * @me: selected ME (micro engine)
209 * @pipe: pipe
210 * @queue: queue
211 * @vmid: VMID
212 *
213 * Switches the currently active registers instances. Some
214 * registers are instanced per VMID, others are instanced per
215 * me/pipe/queue combination.
216 */
217static void cik_srbm_select(struct radeon_device *rdev,
218 u32 me, u32 pipe, u32 queue, u32 vmid)
219{
220 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
221 MEID(me & 0x3) |
222 VMID(vmid & 0xf) |
223 QUEUEID(queue & 0x7));
224 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
225}
226
Alex Deucherbc8273f2012-06-29 19:44:04 -0400227/* ucode loading */
228/**
229 * ci_mc_load_microcode - load MC ucode into the hw
230 *
231 * @rdev: radeon_device pointer
232 *
233 * Load the GDDR MC ucode into the hw (CIK).
234 * Returns 0 on success, error on failure.
235 */
236static int ci_mc_load_microcode(struct radeon_device *rdev)
237{
238 const __be32 *fw_data;
239 u32 running, blackout = 0;
240 u32 *io_mc_regs;
241 int i, ucode_size, regs_size;
242
243 if (!rdev->mc_fw)
244 return -EINVAL;
245
246 switch (rdev->family) {
247 case CHIP_BONAIRE:
248 default:
249 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
250 ucode_size = CIK_MC_UCODE_SIZE;
251 regs_size = BONAIRE_IO_MC_REGS_SIZE;
252 break;
253 }
254
255 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
256
257 if (running == 0) {
258 if (running) {
259 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
260 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
261 }
262
263 /* reset the engine and set to writable */
264 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
265 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
266
267 /* load mc io regs */
268 for (i = 0; i < regs_size; i++) {
269 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
270 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
271 }
272 /* load the MC ucode */
273 fw_data = (const __be32 *)rdev->mc_fw->data;
274 for (i = 0; i < ucode_size; i++)
275 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
276
277 /* put the engine back into the active state */
278 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
279 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
280 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
281
282 /* wait for training to complete */
283 for (i = 0; i < rdev->usec_timeout; i++) {
284 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
285 break;
286 udelay(1);
287 }
288 for (i = 0; i < rdev->usec_timeout; i++) {
289 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
290 break;
291 udelay(1);
292 }
293
294 if (running)
295 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
296 }
297
298 return 0;
299}
300
Alex Deucher02c81322012-12-18 21:43:07 -0500301/**
302 * cik_init_microcode - load ucode images from disk
303 *
304 * @rdev: radeon_device pointer
305 *
306 * Use the firmware interface to load the ucode images into
307 * the driver (not loaded into hw).
308 * Returns 0 on success, error on failure.
309 */
310static int cik_init_microcode(struct radeon_device *rdev)
311{
312 struct platform_device *pdev;
313 const char *chip_name;
314 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400315 mec_req_size, rlc_req_size, mc_req_size,
316 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500317 char fw_name[30];
318 int err;
319
320 DRM_DEBUG("\n");
321
322 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
323 err = IS_ERR(pdev);
324 if (err) {
325 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
326 return -EINVAL;
327 }
328
329 switch (rdev->family) {
330 case CHIP_BONAIRE:
331 chip_name = "BONAIRE";
332 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
333 me_req_size = CIK_ME_UCODE_SIZE * 4;
334 ce_req_size = CIK_CE_UCODE_SIZE * 4;
335 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
336 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
337 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400338 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500339 break;
340 case CHIP_KAVERI:
341 chip_name = "KAVERI";
342 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
343 me_req_size = CIK_ME_UCODE_SIZE * 4;
344 ce_req_size = CIK_CE_UCODE_SIZE * 4;
345 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
346 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400347 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500348 break;
349 case CHIP_KABINI:
350 chip_name = "KABINI";
351 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
352 me_req_size = CIK_ME_UCODE_SIZE * 4;
353 ce_req_size = CIK_CE_UCODE_SIZE * 4;
354 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
355 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400356 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500357 break;
358 default: BUG();
359 }
360
361 DRM_INFO("Loading %s Microcode\n", chip_name);
362
363 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
364 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
365 if (err)
366 goto out;
367 if (rdev->pfp_fw->size != pfp_req_size) {
368 printk(KERN_ERR
369 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
370 rdev->pfp_fw->size, fw_name);
371 err = -EINVAL;
372 goto out;
373 }
374
375 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
376 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
377 if (err)
378 goto out;
379 if (rdev->me_fw->size != me_req_size) {
380 printk(KERN_ERR
381 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
382 rdev->me_fw->size, fw_name);
383 err = -EINVAL;
384 }
385
386 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
387 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
388 if (err)
389 goto out;
390 if (rdev->ce_fw->size != ce_req_size) {
391 printk(KERN_ERR
392 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
393 rdev->ce_fw->size, fw_name);
394 err = -EINVAL;
395 }
396
397 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
398 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
399 if (err)
400 goto out;
401 if (rdev->mec_fw->size != mec_req_size) {
402 printk(KERN_ERR
403 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
404 rdev->mec_fw->size, fw_name);
405 err = -EINVAL;
406 }
407
408 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
409 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
410 if (err)
411 goto out;
412 if (rdev->rlc_fw->size != rlc_req_size) {
413 printk(KERN_ERR
414 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
415 rdev->rlc_fw->size, fw_name);
416 err = -EINVAL;
417 }
418
Alex Deucher21a93e12013-04-09 12:47:11 -0400419 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
420 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
421 if (err)
422 goto out;
423 if (rdev->sdma_fw->size != sdma_req_size) {
424 printk(KERN_ERR
425 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
426 rdev->sdma_fw->size, fw_name);
427 err = -EINVAL;
428 }
429
Alex Deucher02c81322012-12-18 21:43:07 -0500430 /* No MC ucode on APUs */
431 if (!(rdev->flags & RADEON_IS_IGP)) {
432 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
433 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
434 if (err)
435 goto out;
436 if (rdev->mc_fw->size != mc_req_size) {
437 printk(KERN_ERR
438 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
439 rdev->mc_fw->size, fw_name);
440 err = -EINVAL;
441 }
442 }
443
444out:
445 platform_device_unregister(pdev);
446
447 if (err) {
448 if (err != -EINVAL)
449 printk(KERN_ERR
450 "cik_cp: Failed to load firmware \"%s\"\n",
451 fw_name);
452 release_firmware(rdev->pfp_fw);
453 rdev->pfp_fw = NULL;
454 release_firmware(rdev->me_fw);
455 rdev->me_fw = NULL;
456 release_firmware(rdev->ce_fw);
457 rdev->ce_fw = NULL;
458 release_firmware(rdev->rlc_fw);
459 rdev->rlc_fw = NULL;
460 release_firmware(rdev->mc_fw);
461 rdev->mc_fw = NULL;
462 }
463 return err;
464}
465
Alex Deucher8cc1a532013-04-09 12:41:24 -0400466/*
467 * Core functions
468 */
469/**
470 * cik_tiling_mode_table_init - init the hw tiling table
471 *
472 * @rdev: radeon_device pointer
473 *
474 * Starting with SI, the tiling setup is done globally in a
475 * set of 32 tiling modes. Rather than selecting each set of
476 * parameters per surface as on older asics, we just select
477 * which index in the tiling table we want to use, and the
478 * surface uses those parameters (CIK).
479 */
480static void cik_tiling_mode_table_init(struct radeon_device *rdev)
481{
482 const u32 num_tile_mode_states = 32;
483 const u32 num_secondary_tile_mode_states = 16;
484 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
485 u32 num_pipe_configs;
486 u32 num_rbs = rdev->config.cik.max_backends_per_se *
487 rdev->config.cik.max_shader_engines;
488
489 switch (rdev->config.cik.mem_row_size_in_kb) {
490 case 1:
491 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
492 break;
493 case 2:
494 default:
495 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
496 break;
497 case 4:
498 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
499 break;
500 }
501
502 num_pipe_configs = rdev->config.cik.max_tile_pipes;
503 if (num_pipe_configs > 8)
504 num_pipe_configs = 8; /* ??? */
505
506 if (num_pipe_configs == 8) {
507 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
508 switch (reg_offset) {
509 case 0:
510 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
514 break;
515 case 1:
516 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
520 break;
521 case 2:
522 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
526 break;
527 case 3:
528 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
532 break;
533 case 4:
534 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
537 TILE_SPLIT(split_equal_to_row_size));
538 break;
539 case 5:
540 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
542 break;
543 case 6:
544 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
545 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
548 break;
549 case 7:
550 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
551 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
553 TILE_SPLIT(split_equal_to_row_size));
554 break;
555 case 8:
556 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
558 break;
559 case 9:
560 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
562 break;
563 case 10:
564 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
565 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
568 break;
569 case 11:
570 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
572 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
574 break;
575 case 12:
576 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
580 break;
581 case 13:
582 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
584 break;
585 case 14:
586 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
590 break;
591 case 16:
592 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
594 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
596 break;
597 case 17:
598 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
599 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
602 break;
603 case 27:
604 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
605 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
606 break;
607 case 28:
608 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
609 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
612 break;
613 case 29:
614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
616 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
618 break;
619 case 30:
620 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
624 break;
625 default:
626 gb_tile_moden = 0;
627 break;
628 }
629 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
630 }
631 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
632 switch (reg_offset) {
633 case 0:
634 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
637 NUM_BANKS(ADDR_SURF_16_BANK));
638 break;
639 case 1:
640 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
643 NUM_BANKS(ADDR_SURF_16_BANK));
644 break;
645 case 2:
646 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
649 NUM_BANKS(ADDR_SURF_16_BANK));
650 break;
651 case 3:
652 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
655 NUM_BANKS(ADDR_SURF_16_BANK));
656 break;
657 case 4:
658 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
661 NUM_BANKS(ADDR_SURF_8_BANK));
662 break;
663 case 5:
664 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
667 NUM_BANKS(ADDR_SURF_4_BANK));
668 break;
669 case 6:
670 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
673 NUM_BANKS(ADDR_SURF_2_BANK));
674 break;
675 case 8:
676 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
679 NUM_BANKS(ADDR_SURF_16_BANK));
680 break;
681 case 9:
682 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
685 NUM_BANKS(ADDR_SURF_16_BANK));
686 break;
687 case 10:
688 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
691 NUM_BANKS(ADDR_SURF_16_BANK));
692 break;
693 case 11:
694 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
697 NUM_BANKS(ADDR_SURF_16_BANK));
698 break;
699 case 12:
700 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
703 NUM_BANKS(ADDR_SURF_8_BANK));
704 break;
705 case 13:
706 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
709 NUM_BANKS(ADDR_SURF_4_BANK));
710 break;
711 case 14:
712 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
715 NUM_BANKS(ADDR_SURF_2_BANK));
716 break;
717 default:
718 gb_tile_moden = 0;
719 break;
720 }
721 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
722 }
723 } else if (num_pipe_configs == 4) {
724 if (num_rbs == 4) {
725 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
726 switch (reg_offset) {
727 case 0:
728 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
730 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
732 break;
733 case 1:
734 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
735 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
738 break;
739 case 2:
740 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
743 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
744 break;
745 case 3:
746 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
747 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
750 break;
751 case 4:
752 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
755 TILE_SPLIT(split_equal_to_row_size));
756 break;
757 case 5:
758 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
760 break;
761 case 6:
762 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
763 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
764 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
765 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
766 break;
767 case 7:
768 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
769 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
771 TILE_SPLIT(split_equal_to_row_size));
772 break;
773 case 8:
774 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
775 PIPE_CONFIG(ADDR_SURF_P4_16x16));
776 break;
777 case 9:
778 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
780 break;
781 case 10:
782 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
783 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
784 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
786 break;
787 case 11:
788 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
789 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
790 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
792 break;
793 case 12:
794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
795 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
798 break;
799 case 13:
800 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
802 break;
803 case 14:
804 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
805 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
808 break;
809 case 16:
810 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
811 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
812 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
814 break;
815 case 17:
816 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
820 break;
821 case 27:
822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
824 break;
825 case 28:
826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
827 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
830 break;
831 case 29:
832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
833 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
834 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
836 break;
837 case 30:
838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
842 break;
843 default:
844 gb_tile_moden = 0;
845 break;
846 }
847 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
848 }
849 } else if (num_rbs < 4) {
850 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
851 switch (reg_offset) {
852 case 0:
853 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
855 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
857 break;
858 case 1:
859 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
861 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
863 break;
864 case 2:
865 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
867 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
869 break;
870 case 3:
871 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
872 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
873 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
875 break;
876 case 4:
877 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
879 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
880 TILE_SPLIT(split_equal_to_row_size));
881 break;
882 case 5:
883 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
884 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
885 break;
886 case 6:
887 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
888 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
889 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
890 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
891 break;
892 case 7:
893 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
894 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
895 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
896 TILE_SPLIT(split_equal_to_row_size));
897 break;
898 case 8:
899 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
900 PIPE_CONFIG(ADDR_SURF_P4_8x16));
901 break;
902 case 9:
903 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
904 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
905 break;
906 case 10:
907 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
908 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
909 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
911 break;
912 case 11:
913 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
914 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
915 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
917 break;
918 case 12:
919 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
920 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
921 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
923 break;
924 case 13:
925 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
927 break;
928 case 14:
929 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
931 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
933 break;
934 case 16:
935 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
936 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
937 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
939 break;
940 case 17:
941 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
943 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
945 break;
946 case 27:
947 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
948 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
949 break;
950 case 28:
951 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
953 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
955 break;
956 case 29:
957 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
959 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
961 break;
962 case 30:
963 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
964 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
965 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
967 break;
968 default:
969 gb_tile_moden = 0;
970 break;
971 }
972 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
973 }
974 }
975 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
976 switch (reg_offset) {
977 case 0:
978 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
981 NUM_BANKS(ADDR_SURF_16_BANK));
982 break;
983 case 1:
984 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
987 NUM_BANKS(ADDR_SURF_16_BANK));
988 break;
989 case 2:
990 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
993 NUM_BANKS(ADDR_SURF_16_BANK));
994 break;
995 case 3:
996 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
999 NUM_BANKS(ADDR_SURF_16_BANK));
1000 break;
1001 case 4:
1002 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1005 NUM_BANKS(ADDR_SURF_16_BANK));
1006 break;
1007 case 5:
1008 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1011 NUM_BANKS(ADDR_SURF_8_BANK));
1012 break;
1013 case 6:
1014 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1017 NUM_BANKS(ADDR_SURF_4_BANK));
1018 break;
1019 case 8:
1020 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1023 NUM_BANKS(ADDR_SURF_16_BANK));
1024 break;
1025 case 9:
1026 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1029 NUM_BANKS(ADDR_SURF_16_BANK));
1030 break;
1031 case 10:
1032 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1035 NUM_BANKS(ADDR_SURF_16_BANK));
1036 break;
1037 case 11:
1038 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1041 NUM_BANKS(ADDR_SURF_16_BANK));
1042 break;
1043 case 12:
1044 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1047 NUM_BANKS(ADDR_SURF_16_BANK));
1048 break;
1049 case 13:
1050 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1053 NUM_BANKS(ADDR_SURF_8_BANK));
1054 break;
1055 case 14:
1056 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1059 NUM_BANKS(ADDR_SURF_4_BANK));
1060 break;
1061 default:
1062 gb_tile_moden = 0;
1063 break;
1064 }
1065 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1066 }
1067 } else if (num_pipe_configs == 2) {
1068 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1069 switch (reg_offset) {
1070 case 0:
1071 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1073 PIPE_CONFIG(ADDR_SURF_P2) |
1074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1075 break;
1076 case 1:
1077 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1078 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1079 PIPE_CONFIG(ADDR_SURF_P2) |
1080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1081 break;
1082 case 2:
1083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1085 PIPE_CONFIG(ADDR_SURF_P2) |
1086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1087 break;
1088 case 3:
1089 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1091 PIPE_CONFIG(ADDR_SURF_P2) |
1092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1093 break;
1094 case 4:
1095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1097 PIPE_CONFIG(ADDR_SURF_P2) |
1098 TILE_SPLIT(split_equal_to_row_size));
1099 break;
1100 case 5:
1101 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1103 break;
1104 case 6:
1105 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1107 PIPE_CONFIG(ADDR_SURF_P2) |
1108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1109 break;
1110 case 7:
1111 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1113 PIPE_CONFIG(ADDR_SURF_P2) |
1114 TILE_SPLIT(split_equal_to_row_size));
1115 break;
1116 case 8:
1117 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1118 break;
1119 case 9:
1120 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1122 break;
1123 case 10:
1124 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1125 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1126 PIPE_CONFIG(ADDR_SURF_P2) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1128 break;
1129 case 11:
1130 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1131 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1132 PIPE_CONFIG(ADDR_SURF_P2) |
1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1134 break;
1135 case 12:
1136 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1138 PIPE_CONFIG(ADDR_SURF_P2) |
1139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1140 break;
1141 case 13:
1142 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1144 break;
1145 case 14:
1146 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1148 PIPE_CONFIG(ADDR_SURF_P2) |
1149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1150 break;
1151 case 16:
1152 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1154 PIPE_CONFIG(ADDR_SURF_P2) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1156 break;
1157 case 17:
1158 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1160 PIPE_CONFIG(ADDR_SURF_P2) |
1161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1162 break;
1163 case 27:
1164 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1165 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1166 break;
1167 case 28:
1168 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1170 PIPE_CONFIG(ADDR_SURF_P2) |
1171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1172 break;
1173 case 29:
1174 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1175 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1176 PIPE_CONFIG(ADDR_SURF_P2) |
1177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1178 break;
1179 case 30:
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P2) |
1183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1184 break;
1185 default:
1186 gb_tile_moden = 0;
1187 break;
1188 }
1189 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1190 }
1191 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1192 switch (reg_offset) {
1193 case 0:
1194 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 break;
1199 case 1:
1200 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1203 NUM_BANKS(ADDR_SURF_16_BANK));
1204 break;
1205 case 2:
1206 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1209 NUM_BANKS(ADDR_SURF_16_BANK));
1210 break;
1211 case 3:
1212 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1215 NUM_BANKS(ADDR_SURF_16_BANK));
1216 break;
1217 case 4:
1218 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1221 NUM_BANKS(ADDR_SURF_16_BANK));
1222 break;
1223 case 5:
1224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1227 NUM_BANKS(ADDR_SURF_16_BANK));
1228 break;
1229 case 6:
1230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1233 NUM_BANKS(ADDR_SURF_8_BANK));
1234 break;
1235 case 8:
1236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1239 NUM_BANKS(ADDR_SURF_16_BANK));
1240 break;
1241 case 9:
1242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1245 NUM_BANKS(ADDR_SURF_16_BANK));
1246 break;
1247 case 10:
1248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1251 NUM_BANKS(ADDR_SURF_16_BANK));
1252 break;
1253 case 11:
1254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1257 NUM_BANKS(ADDR_SURF_16_BANK));
1258 break;
1259 case 12:
1260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1263 NUM_BANKS(ADDR_SURF_16_BANK));
1264 break;
1265 case 13:
1266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1269 NUM_BANKS(ADDR_SURF_16_BANK));
1270 break;
1271 case 14:
1272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1275 NUM_BANKS(ADDR_SURF_8_BANK));
1276 break;
1277 default:
1278 gb_tile_moden = 0;
1279 break;
1280 }
1281 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1282 }
1283 } else
1284 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1285}
1286
1287/**
1288 * cik_select_se_sh - select which SE, SH to address
1289 *
1290 * @rdev: radeon_device pointer
1291 * @se_num: shader engine to address
1292 * @sh_num: sh block to address
1293 *
1294 * Select which SE, SH combinations to address. Certain
1295 * registers are instanced per SE or SH. 0xffffffff means
1296 * broadcast to all SEs or SHs (CIK).
1297 */
1298static void cik_select_se_sh(struct radeon_device *rdev,
1299 u32 se_num, u32 sh_num)
1300{
1301 u32 data = INSTANCE_BROADCAST_WRITES;
1302
1303 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1304 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1305 else if (se_num == 0xffffffff)
1306 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1307 else if (sh_num == 0xffffffff)
1308 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1309 else
1310 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1311 WREG32(GRBM_GFX_INDEX, data);
1312}
1313
1314/**
1315 * cik_create_bitmask - create a bitmask
1316 *
1317 * @bit_width: length of the mask
1318 *
1319 * create a variable length bit mask (CIK).
1320 * Returns the bitmask.
1321 */
1322static u32 cik_create_bitmask(u32 bit_width)
1323{
1324 u32 i, mask = 0;
1325
1326 for (i = 0; i < bit_width; i++) {
1327 mask <<= 1;
1328 mask |= 1;
1329 }
1330 return mask;
1331}
1332
1333/**
1334 * cik_select_se_sh - select which SE, SH to address
1335 *
1336 * @rdev: radeon_device pointer
1337 * @max_rb_num: max RBs (render backends) for the asic
1338 * @se_num: number of SEs (shader engines) for the asic
1339 * @sh_per_se: number of SH blocks per SE for the asic
1340 *
1341 * Calculates the bitmask of disabled RBs (CIK).
1342 * Returns the disabled RB bitmask.
1343 */
1344static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1345 u32 max_rb_num, u32 se_num,
1346 u32 sh_per_se)
1347{
1348 u32 data, mask;
1349
1350 data = RREG32(CC_RB_BACKEND_DISABLE);
1351 if (data & 1)
1352 data &= BACKEND_DISABLE_MASK;
1353 else
1354 data = 0;
1355 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1356
1357 data >>= BACKEND_DISABLE_SHIFT;
1358
1359 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1360
1361 return data & mask;
1362}
1363
1364/**
1365 * cik_setup_rb - setup the RBs on the asic
1366 *
1367 * @rdev: radeon_device pointer
1368 * @se_num: number of SEs (shader engines) for the asic
1369 * @sh_per_se: number of SH blocks per SE for the asic
1370 * @max_rb_num: max RBs (render backends) for the asic
1371 *
1372 * Configures per-SE/SH RB registers (CIK).
1373 */
1374static void cik_setup_rb(struct radeon_device *rdev,
1375 u32 se_num, u32 sh_per_se,
1376 u32 max_rb_num)
1377{
1378 int i, j;
1379 u32 data, mask;
1380 u32 disabled_rbs = 0;
1381 u32 enabled_rbs = 0;
1382
1383 for (i = 0; i < se_num; i++) {
1384 for (j = 0; j < sh_per_se; j++) {
1385 cik_select_se_sh(rdev, i, j);
1386 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1387 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1388 }
1389 }
1390 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1391
1392 mask = 1;
1393 for (i = 0; i < max_rb_num; i++) {
1394 if (!(disabled_rbs & mask))
1395 enabled_rbs |= mask;
1396 mask <<= 1;
1397 }
1398
1399 for (i = 0; i < se_num; i++) {
1400 cik_select_se_sh(rdev, i, 0xffffffff);
1401 data = 0;
1402 for (j = 0; j < sh_per_se; j++) {
1403 switch (enabled_rbs & 3) {
1404 case 1:
1405 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1406 break;
1407 case 2:
1408 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1409 break;
1410 case 3:
1411 default:
1412 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1413 break;
1414 }
1415 enabled_rbs >>= 2;
1416 }
1417 WREG32(PA_SC_RASTER_CONFIG, data);
1418 }
1419 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1420}
1421
1422/**
1423 * cik_gpu_init - setup the 3D engine
1424 *
1425 * @rdev: radeon_device pointer
1426 *
1427 * Configures the 3D engine and tiling configuration
1428 * registers so that the 3D engine is usable.
1429 */
1430static void cik_gpu_init(struct radeon_device *rdev)
1431{
1432 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1433 u32 mc_shared_chmap, mc_arb_ramcfg;
1434 u32 hdp_host_path_cntl;
1435 u32 tmp;
1436 int i, j;
1437
1438 switch (rdev->family) {
1439 case CHIP_BONAIRE:
1440 rdev->config.cik.max_shader_engines = 2;
1441 rdev->config.cik.max_tile_pipes = 4;
1442 rdev->config.cik.max_cu_per_sh = 7;
1443 rdev->config.cik.max_sh_per_se = 1;
1444 rdev->config.cik.max_backends_per_se = 2;
1445 rdev->config.cik.max_texture_channel_caches = 4;
1446 rdev->config.cik.max_gprs = 256;
1447 rdev->config.cik.max_gs_threads = 32;
1448 rdev->config.cik.max_hw_contexts = 8;
1449
1450 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1451 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1452 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1453 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1454 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1455 break;
1456 case CHIP_KAVERI:
1457 /* TODO */
1458 break;
1459 case CHIP_KABINI:
1460 default:
1461 rdev->config.cik.max_shader_engines = 1;
1462 rdev->config.cik.max_tile_pipes = 2;
1463 rdev->config.cik.max_cu_per_sh = 2;
1464 rdev->config.cik.max_sh_per_se = 1;
1465 rdev->config.cik.max_backends_per_se = 1;
1466 rdev->config.cik.max_texture_channel_caches = 2;
1467 rdev->config.cik.max_gprs = 256;
1468 rdev->config.cik.max_gs_threads = 16;
1469 rdev->config.cik.max_hw_contexts = 8;
1470
1471 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1472 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1473 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1474 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1475 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1476 break;
1477 }
1478
1479 /* Initialize HDP */
1480 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1481 WREG32((0x2c14 + j), 0x00000000);
1482 WREG32((0x2c18 + j), 0x00000000);
1483 WREG32((0x2c1c + j), 0x00000000);
1484 WREG32((0x2c20 + j), 0x00000000);
1485 WREG32((0x2c24 + j), 0x00000000);
1486 }
1487
1488 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1489
1490 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1491
1492 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1493 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1494
1495 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1496 rdev->config.cik.mem_max_burst_length_bytes = 256;
1497 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1498 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1499 if (rdev->config.cik.mem_row_size_in_kb > 4)
1500 rdev->config.cik.mem_row_size_in_kb = 4;
1501 /* XXX use MC settings? */
1502 rdev->config.cik.shader_engine_tile_size = 32;
1503 rdev->config.cik.num_gpus = 1;
1504 rdev->config.cik.multi_gpu_tile_size = 64;
1505
1506 /* fix up row size */
1507 gb_addr_config &= ~ROW_SIZE_MASK;
1508 switch (rdev->config.cik.mem_row_size_in_kb) {
1509 case 1:
1510 default:
1511 gb_addr_config |= ROW_SIZE(0);
1512 break;
1513 case 2:
1514 gb_addr_config |= ROW_SIZE(1);
1515 break;
1516 case 4:
1517 gb_addr_config |= ROW_SIZE(2);
1518 break;
1519 }
1520
1521 /* setup tiling info dword. gb_addr_config is not adequate since it does
1522 * not have bank info, so create a custom tiling dword.
1523 * bits 3:0 num_pipes
1524 * bits 7:4 num_banks
1525 * bits 11:8 group_size
1526 * bits 15:12 row_size
1527 */
1528 rdev->config.cik.tile_config = 0;
1529 switch (rdev->config.cik.num_tile_pipes) {
1530 case 1:
1531 rdev->config.cik.tile_config |= (0 << 0);
1532 break;
1533 case 2:
1534 rdev->config.cik.tile_config |= (1 << 0);
1535 break;
1536 case 4:
1537 rdev->config.cik.tile_config |= (2 << 0);
1538 break;
1539 case 8:
1540 default:
1541 /* XXX what about 12? */
1542 rdev->config.cik.tile_config |= (3 << 0);
1543 break;
1544 }
1545 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1546 rdev->config.cik.tile_config |= 1 << 4;
1547 else
1548 rdev->config.cik.tile_config |= 0 << 4;
1549 rdev->config.cik.tile_config |=
1550 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1551 rdev->config.cik.tile_config |=
1552 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1553
1554 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1555 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1556 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001557 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1558 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001559 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1560 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1561 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001562
1563 cik_tiling_mode_table_init(rdev);
1564
1565 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1566 rdev->config.cik.max_sh_per_se,
1567 rdev->config.cik.max_backends_per_se);
1568
1569 /* set HW defaults for 3D engine */
1570 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1571
1572 WREG32(SX_DEBUG_1, 0x20);
1573
1574 WREG32(TA_CNTL_AUX, 0x00010000);
1575
1576 tmp = RREG32(SPI_CONFIG_CNTL);
1577 tmp |= 0x03000000;
1578 WREG32(SPI_CONFIG_CNTL, tmp);
1579
1580 WREG32(SQ_CONFIG, 1);
1581
1582 WREG32(DB_DEBUG, 0);
1583
1584 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1585 tmp |= 0x00000400;
1586 WREG32(DB_DEBUG2, tmp);
1587
1588 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1589 tmp |= 0x00020200;
1590 WREG32(DB_DEBUG3, tmp);
1591
1592 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1593 tmp |= 0x00018208;
1594 WREG32(CB_HW_CONTROL, tmp);
1595
1596 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1597
1598 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1599 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1600 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1601 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1602
1603 WREG32(VGT_NUM_INSTANCES, 1);
1604
1605 WREG32(CP_PERFMON_CNTL, 0);
1606
1607 WREG32(SQ_CONFIG, 0);
1608
1609 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1610 FORCE_EOV_MAX_REZ_CNT(255)));
1611
1612 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1613 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1614
1615 WREG32(VGT_GS_VERTEX_REUSE, 16);
1616 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1617
1618 tmp = RREG32(HDP_MISC_CNTL);
1619 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1620 WREG32(HDP_MISC_CNTL, tmp);
1621
1622 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1623 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1624
1625 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1626 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1627
1628 udelay(50);
1629}
1630
Alex Deucher841cf442012-12-18 21:47:44 -05001631/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001632 * GPU scratch registers helpers function.
1633 */
1634/**
1635 * cik_scratch_init - setup driver info for CP scratch regs
1636 *
1637 * @rdev: radeon_device pointer
1638 *
1639 * Set up the number and offset of the CP scratch registers.
1640 * NOTE: use of CP scratch registers is a legacy inferface and
1641 * is not used by default on newer asics (r6xx+). On newer asics,
1642 * memory buffers are used for fences rather than scratch regs.
1643 */
1644static void cik_scratch_init(struct radeon_device *rdev)
1645{
1646 int i;
1647
1648 rdev->scratch.num_reg = 7;
1649 rdev->scratch.reg_base = SCRATCH_REG0;
1650 for (i = 0; i < rdev->scratch.num_reg; i++) {
1651 rdev->scratch.free[i] = true;
1652 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1653 }
1654}
1655
1656/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04001657 * cik_ring_test - basic gfx ring test
1658 *
1659 * @rdev: radeon_device pointer
1660 * @ring: radeon_ring structure holding ring information
1661 *
1662 * Allocate a scratch register and write to it using the gfx ring (CIK).
1663 * Provides a basic gfx ring test to verify that the ring is working.
1664 * Used by cik_cp_gfx_resume();
1665 * Returns 0 on success, error on failure.
1666 */
1667int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1668{
1669 uint32_t scratch;
1670 uint32_t tmp = 0;
1671 unsigned i;
1672 int r;
1673
1674 r = radeon_scratch_get(rdev, &scratch);
1675 if (r) {
1676 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1677 return r;
1678 }
1679 WREG32(scratch, 0xCAFEDEAD);
1680 r = radeon_ring_lock(rdev, ring, 3);
1681 if (r) {
1682 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1683 radeon_scratch_free(rdev, scratch);
1684 return r;
1685 }
1686 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1687 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1688 radeon_ring_write(ring, 0xDEADBEEF);
1689 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04001690
Alex Deucherfbc832c2012-07-20 14:41:35 -04001691 for (i = 0; i < rdev->usec_timeout; i++) {
1692 tmp = RREG32(scratch);
1693 if (tmp == 0xDEADBEEF)
1694 break;
1695 DRM_UDELAY(1);
1696 }
1697 if (i < rdev->usec_timeout) {
1698 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1699 } else {
1700 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1701 ring->idx, scratch, tmp);
1702 r = -EINVAL;
1703 }
1704 radeon_scratch_free(rdev, scratch);
1705 return r;
1706}
1707
1708/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04001709 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001710 *
1711 * @rdev: radeon_device pointer
1712 * @fence: radeon fence object
1713 *
1714 * Emits a fence sequnce number on the gfx ring and flushes
1715 * GPU caches.
1716 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04001717void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
1718 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001719{
1720 struct radeon_ring *ring = &rdev->ring[fence->ring];
1721 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1722
1723 /* EVENT_WRITE_EOP - flush caches, send int */
1724 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1725 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1726 EOP_TC_ACTION_EN |
1727 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1728 EVENT_INDEX(5)));
1729 radeon_ring_write(ring, addr & 0xfffffffc);
1730 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1731 radeon_ring_write(ring, fence->seq);
1732 radeon_ring_write(ring, 0);
1733 /* HDP flush */
1734 /* We should be using the new WAIT_REG_MEM special op packet here
1735 * but it causes the CP to hang
1736 */
1737 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1738 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1739 WRITE_DATA_DST_SEL(0)));
1740 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1741 radeon_ring_write(ring, 0);
1742 radeon_ring_write(ring, 0);
1743}
1744
Alex Deucherb07fdd32013-04-11 09:36:17 -04001745/**
1746 * cik_fence_compute_ring_emit - emit a fence on the compute ring
1747 *
1748 * @rdev: radeon_device pointer
1749 * @fence: radeon fence object
1750 *
1751 * Emits a fence sequnce number on the compute ring and flushes
1752 * GPU caches.
1753 */
1754void cik_fence_compute_ring_emit(struct radeon_device *rdev,
1755 struct radeon_fence *fence)
1756{
1757 struct radeon_ring *ring = &rdev->ring[fence->ring];
1758 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1759
1760 /* RELEASE_MEM - flush caches, send int */
1761 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
1762 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1763 EOP_TC_ACTION_EN |
1764 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1765 EVENT_INDEX(5)));
1766 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
1767 radeon_ring_write(ring, addr & 0xfffffffc);
1768 radeon_ring_write(ring, upper_32_bits(addr));
1769 radeon_ring_write(ring, fence->seq);
1770 radeon_ring_write(ring, 0);
1771 /* HDP flush */
1772 /* We should be using the new WAIT_REG_MEM special op packet here
1773 * but it causes the CP to hang
1774 */
1775 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1776 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1777 WRITE_DATA_DST_SEL(0)));
1778 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1779 radeon_ring_write(ring, 0);
1780 radeon_ring_write(ring, 0);
1781}
1782
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001783void cik_semaphore_ring_emit(struct radeon_device *rdev,
1784 struct radeon_ring *ring,
1785 struct radeon_semaphore *semaphore,
1786 bool emit_wait)
1787{
1788 uint64_t addr = semaphore->gpu_addr;
1789 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1790
1791 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1792 radeon_ring_write(ring, addr & 0xffffffff);
1793 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1794}
1795
1796/*
1797 * IB stuff
1798 */
1799/**
1800 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1801 *
1802 * @rdev: radeon_device pointer
1803 * @ib: radeon indirect buffer object
1804 *
1805 * Emits an DE (drawing engine) or CE (constant engine) IB
1806 * on the gfx ring. IBs are usually generated by userspace
1807 * acceleration drivers and submitted to the kernel for
1808 * sheduling on the ring. This function schedules the IB
1809 * on the gfx ring for execution by the GPU.
1810 */
1811void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1812{
1813 struct radeon_ring *ring = &rdev->ring[ib->ring];
1814 u32 header, control = INDIRECT_BUFFER_VALID;
1815
1816 if (ib->is_const_ib) {
1817 /* set switch buffer packet before const IB */
1818 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1819 radeon_ring_write(ring, 0);
1820
1821 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1822 } else {
1823 u32 next_rptr;
1824 if (ring->rptr_save_reg) {
1825 next_rptr = ring->wptr + 3 + 4;
1826 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1827 radeon_ring_write(ring, ((ring->rptr_save_reg -
1828 PACKET3_SET_UCONFIG_REG_START) >> 2));
1829 radeon_ring_write(ring, next_rptr);
1830 } else if (rdev->wb.enabled) {
1831 next_rptr = ring->wptr + 5 + 4;
1832 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1833 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1834 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1835 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1836 radeon_ring_write(ring, next_rptr);
1837 }
1838
1839 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1840 }
1841
1842 control |= ib->length_dw |
1843 (ib->vm ? (ib->vm->id << 24) : 0);
1844
1845 radeon_ring_write(ring, header);
1846 radeon_ring_write(ring,
1847#ifdef __BIG_ENDIAN
1848 (2 << 0) |
1849#endif
1850 (ib->gpu_addr & 0xFFFFFFFC));
1851 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1852 radeon_ring_write(ring, control);
1853}
1854
Alex Deucherfbc832c2012-07-20 14:41:35 -04001855/**
1856 * cik_ib_test - basic gfx ring IB test
1857 *
1858 * @rdev: radeon_device pointer
1859 * @ring: radeon_ring structure holding ring information
1860 *
1861 * Allocate an IB and execute it on the gfx ring (CIK).
1862 * Provides a basic gfx ring test to verify that IBs are working.
1863 * Returns 0 on success, error on failure.
1864 */
1865int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1866{
1867 struct radeon_ib ib;
1868 uint32_t scratch;
1869 uint32_t tmp = 0;
1870 unsigned i;
1871 int r;
1872
1873 r = radeon_scratch_get(rdev, &scratch);
1874 if (r) {
1875 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1876 return r;
1877 }
1878 WREG32(scratch, 0xCAFEDEAD);
1879 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1880 if (r) {
1881 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1882 return r;
1883 }
1884 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1885 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1886 ib.ptr[2] = 0xDEADBEEF;
1887 ib.length_dw = 3;
1888 r = radeon_ib_schedule(rdev, &ib, NULL);
1889 if (r) {
1890 radeon_scratch_free(rdev, scratch);
1891 radeon_ib_free(rdev, &ib);
1892 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1893 return r;
1894 }
1895 r = radeon_fence_wait(ib.fence, false);
1896 if (r) {
1897 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1898 return r;
1899 }
1900 for (i = 0; i < rdev->usec_timeout; i++) {
1901 tmp = RREG32(scratch);
1902 if (tmp == 0xDEADBEEF)
1903 break;
1904 DRM_UDELAY(1);
1905 }
1906 if (i < rdev->usec_timeout) {
1907 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1908 } else {
1909 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1910 scratch, tmp);
1911 r = -EINVAL;
1912 }
1913 radeon_scratch_free(rdev, scratch);
1914 radeon_ib_free(rdev, &ib);
1915 return r;
1916}
1917
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001918/*
Alex Deucher841cf442012-12-18 21:47:44 -05001919 * CP.
1920 * On CIK, gfx and compute now have independant command processors.
1921 *
1922 * GFX
1923 * Gfx consists of a single ring and can process both gfx jobs and
1924 * compute jobs. The gfx CP consists of three microengines (ME):
1925 * PFP - Pre-Fetch Parser
1926 * ME - Micro Engine
1927 * CE - Constant Engine
1928 * The PFP and ME make up what is considered the Drawing Engine (DE).
1929 * The CE is an asynchronous engine used for updating buffer desciptors
1930 * used by the DE so that they can be loaded into cache in parallel
1931 * while the DE is processing state update packets.
1932 *
1933 * Compute
1934 * The compute CP consists of two microengines (ME):
1935 * MEC1 - Compute MicroEngine 1
1936 * MEC2 - Compute MicroEngine 2
1937 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1938 * The queues are exposed to userspace and are programmed directly
1939 * by the compute runtime.
1940 */
1941/**
1942 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1943 *
1944 * @rdev: radeon_device pointer
1945 * @enable: enable or disable the MEs
1946 *
1947 * Halts or unhalts the gfx MEs.
1948 */
1949static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1950{
1951 if (enable)
1952 WREG32(CP_ME_CNTL, 0);
1953 else {
1954 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1955 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1956 }
1957 udelay(50);
1958}
1959
1960/**
1961 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1962 *
1963 * @rdev: radeon_device pointer
1964 *
1965 * Loads the gfx PFP, ME, and CE ucode.
1966 * Returns 0 for success, -EINVAL if the ucode is not available.
1967 */
1968static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1969{
1970 const __be32 *fw_data;
1971 int i;
1972
1973 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1974 return -EINVAL;
1975
1976 cik_cp_gfx_enable(rdev, false);
1977
1978 /* PFP */
1979 fw_data = (const __be32 *)rdev->pfp_fw->data;
1980 WREG32(CP_PFP_UCODE_ADDR, 0);
1981 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1982 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1983 WREG32(CP_PFP_UCODE_ADDR, 0);
1984
1985 /* CE */
1986 fw_data = (const __be32 *)rdev->ce_fw->data;
1987 WREG32(CP_CE_UCODE_ADDR, 0);
1988 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1989 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1990 WREG32(CP_CE_UCODE_ADDR, 0);
1991
1992 /* ME */
1993 fw_data = (const __be32 *)rdev->me_fw->data;
1994 WREG32(CP_ME_RAM_WADDR, 0);
1995 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1996 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1997 WREG32(CP_ME_RAM_WADDR, 0);
1998
1999 WREG32(CP_PFP_UCODE_ADDR, 0);
2000 WREG32(CP_CE_UCODE_ADDR, 0);
2001 WREG32(CP_ME_RAM_WADDR, 0);
2002 WREG32(CP_ME_RAM_RADDR, 0);
2003 return 0;
2004}
2005
2006/**
2007 * cik_cp_gfx_start - start the gfx ring
2008 *
2009 * @rdev: radeon_device pointer
2010 *
2011 * Enables the ring and loads the clear state context and other
2012 * packets required to init the ring.
2013 * Returns 0 for success, error for failure.
2014 */
2015static int cik_cp_gfx_start(struct radeon_device *rdev)
2016{
2017 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2018 int r, i;
2019
2020 /* init the CP */
2021 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2022 WREG32(CP_ENDIAN_SWAP, 0);
2023 WREG32(CP_DEVICE_ID, 1);
2024
2025 cik_cp_gfx_enable(rdev, true);
2026
2027 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2028 if (r) {
2029 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2030 return r;
2031 }
2032
2033 /* init the CE partitions. CE only used for gfx on CIK */
2034 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2035 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2036 radeon_ring_write(ring, 0xc000);
2037 radeon_ring_write(ring, 0xc000);
2038
2039 /* setup clear context state */
2040 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2041 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2042
2043 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2044 radeon_ring_write(ring, 0x80000000);
2045 radeon_ring_write(ring, 0x80000000);
2046
2047 for (i = 0; i < cik_default_size; i++)
2048 radeon_ring_write(ring, cik_default_state[i]);
2049
2050 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2051 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2052
2053 /* set clear context state */
2054 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2055 radeon_ring_write(ring, 0);
2056
2057 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2058 radeon_ring_write(ring, 0x00000316);
2059 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2060 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2061
2062 radeon_ring_unlock_commit(rdev, ring);
2063
2064 return 0;
2065}
2066
2067/**
2068 * cik_cp_gfx_fini - stop the gfx ring
2069 *
2070 * @rdev: radeon_device pointer
2071 *
2072 * Stop the gfx ring and tear down the driver ring
2073 * info.
2074 */
2075static void cik_cp_gfx_fini(struct radeon_device *rdev)
2076{
2077 cik_cp_gfx_enable(rdev, false);
2078 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2079}
2080
2081/**
2082 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2083 *
2084 * @rdev: radeon_device pointer
2085 *
2086 * Program the location and size of the gfx ring buffer
2087 * and test it to make sure it's working.
2088 * Returns 0 for success, error for failure.
2089 */
2090static int cik_cp_gfx_resume(struct radeon_device *rdev)
2091{
2092 struct radeon_ring *ring;
2093 u32 tmp;
2094 u32 rb_bufsz;
2095 u64 rb_addr;
2096 int r;
2097
2098 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2099 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2100
2101 /* Set the write pointer delay */
2102 WREG32(CP_RB_WPTR_DELAY, 0);
2103
2104 /* set the RB to use vmid 0 */
2105 WREG32(CP_RB_VMID, 0);
2106
2107 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2108
2109 /* ring 0 - compute and gfx */
2110 /* Set ring buffer size */
2111 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2112 rb_bufsz = drm_order(ring->ring_size / 8);
2113 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2114#ifdef __BIG_ENDIAN
2115 tmp |= BUF_SWAP_32BIT;
2116#endif
2117 WREG32(CP_RB0_CNTL, tmp);
2118
2119 /* Initialize the ring buffer's read and write pointers */
2120 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2121 ring->wptr = 0;
2122 WREG32(CP_RB0_WPTR, ring->wptr);
2123
2124 /* set the wb address wether it's enabled or not */
2125 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2126 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2127
2128 /* scratch register shadowing is no longer supported */
2129 WREG32(SCRATCH_UMSK, 0);
2130
2131 if (!rdev->wb.enabled)
2132 tmp |= RB_NO_UPDATE;
2133
2134 mdelay(1);
2135 WREG32(CP_RB0_CNTL, tmp);
2136
2137 rb_addr = ring->gpu_addr >> 8;
2138 WREG32(CP_RB0_BASE, rb_addr);
2139 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2140
2141 ring->rptr = RREG32(CP_RB0_RPTR);
2142
2143 /* start the ring */
2144 cik_cp_gfx_start(rdev);
2145 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2146 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2147 if (r) {
2148 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2149 return r;
2150 }
2151 return 0;
2152}
2153
Alex Deucher963e81f2013-06-26 17:37:11 -04002154u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2155 struct radeon_ring *ring)
2156{
2157 u32 rptr;
2158
2159
2160
2161 if (rdev->wb.enabled) {
2162 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2163 } else {
2164 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2165 rptr = RREG32(CP_HQD_PQ_RPTR);
2166 cik_srbm_select(rdev, 0, 0, 0, 0);
2167 }
2168 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2169
2170 return rptr;
2171}
2172
2173u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2174 struct radeon_ring *ring)
2175{
2176 u32 wptr;
2177
2178 if (rdev->wb.enabled) {
2179 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2180 } else {
2181 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2182 wptr = RREG32(CP_HQD_PQ_WPTR);
2183 cik_srbm_select(rdev, 0, 0, 0, 0);
2184 }
2185 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2186
2187 return wptr;
2188}
2189
2190void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2191 struct radeon_ring *ring)
2192{
2193 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2194
2195 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2196 WDOORBELL32(ring->doorbell_offset, wptr);
2197}
2198
Alex Deucher841cf442012-12-18 21:47:44 -05002199/**
2200 * cik_cp_compute_enable - enable/disable the compute CP MEs
2201 *
2202 * @rdev: radeon_device pointer
2203 * @enable: enable or disable the MEs
2204 *
2205 * Halts or unhalts the compute MEs.
2206 */
2207static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2208{
2209 if (enable)
2210 WREG32(CP_MEC_CNTL, 0);
2211 else
2212 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2213 udelay(50);
2214}
2215
2216/**
2217 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2218 *
2219 * @rdev: radeon_device pointer
2220 *
2221 * Loads the compute MEC1&2 ucode.
2222 * Returns 0 for success, -EINVAL if the ucode is not available.
2223 */
2224static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2225{
2226 const __be32 *fw_data;
2227 int i;
2228
2229 if (!rdev->mec_fw)
2230 return -EINVAL;
2231
2232 cik_cp_compute_enable(rdev, false);
2233
2234 /* MEC1 */
2235 fw_data = (const __be32 *)rdev->mec_fw->data;
2236 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2237 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2238 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2239 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2240
2241 if (rdev->family == CHIP_KAVERI) {
2242 /* MEC2 */
2243 fw_data = (const __be32 *)rdev->mec_fw->data;
2244 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2245 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2246 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2247 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2248 }
2249
2250 return 0;
2251}
2252
2253/**
2254 * cik_cp_compute_start - start the compute queues
2255 *
2256 * @rdev: radeon_device pointer
2257 *
2258 * Enable the compute queues.
2259 * Returns 0 for success, error for failure.
2260 */
2261static int cik_cp_compute_start(struct radeon_device *rdev)
2262{
Alex Deucher963e81f2013-06-26 17:37:11 -04002263 cik_cp_compute_enable(rdev, true);
2264
Alex Deucher841cf442012-12-18 21:47:44 -05002265 return 0;
2266}
2267
2268/**
2269 * cik_cp_compute_fini - stop the compute queues
2270 *
2271 * @rdev: radeon_device pointer
2272 *
2273 * Stop the compute queues and tear down the driver queue
2274 * info.
2275 */
2276static void cik_cp_compute_fini(struct radeon_device *rdev)
2277{
Alex Deucher963e81f2013-06-26 17:37:11 -04002278 int i, idx, r;
2279
Alex Deucher841cf442012-12-18 21:47:44 -05002280 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002281
2282 for (i = 0; i < 2; i++) {
2283 if (i == 0)
2284 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2285 else
2286 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2287
2288 if (rdev->ring[idx].mqd_obj) {
2289 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2290 if (unlikely(r != 0))
2291 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2292
2293 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2294 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2295
2296 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2297 rdev->ring[idx].mqd_obj = NULL;
2298 }
2299 }
Alex Deucher841cf442012-12-18 21:47:44 -05002300}
2301
Alex Deucher963e81f2013-06-26 17:37:11 -04002302static void cik_mec_fini(struct radeon_device *rdev)
2303{
2304 int r;
2305
2306 if (rdev->mec.hpd_eop_obj) {
2307 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2308 if (unlikely(r != 0))
2309 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2310 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2311 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2312
2313 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2314 rdev->mec.hpd_eop_obj = NULL;
2315 }
2316}
2317
2318#define MEC_HPD_SIZE 2048
2319
2320static int cik_mec_init(struct radeon_device *rdev)
2321{
2322 int r;
2323 u32 *hpd;
2324
2325 /*
2326 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2327 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2328 */
2329 if (rdev->family == CHIP_KAVERI)
2330 rdev->mec.num_mec = 2;
2331 else
2332 rdev->mec.num_mec = 1;
2333 rdev->mec.num_pipe = 4;
2334 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2335
2336 if (rdev->mec.hpd_eop_obj == NULL) {
2337 r = radeon_bo_create(rdev,
2338 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2339 PAGE_SIZE, true,
2340 RADEON_GEM_DOMAIN_GTT, NULL,
2341 &rdev->mec.hpd_eop_obj);
2342 if (r) {
2343 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2344 return r;
2345 }
2346 }
2347
2348 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2349 if (unlikely(r != 0)) {
2350 cik_mec_fini(rdev);
2351 return r;
2352 }
2353 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2354 &rdev->mec.hpd_eop_gpu_addr);
2355 if (r) {
2356 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2357 cik_mec_fini(rdev);
2358 return r;
2359 }
2360 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2361 if (r) {
2362 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2363 cik_mec_fini(rdev);
2364 return r;
2365 }
2366
2367 /* clear memory. Not sure if this is required or not */
2368 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2369
2370 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2371 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2372
2373 return 0;
2374}
2375
2376struct hqd_registers
2377{
2378 u32 cp_mqd_base_addr;
2379 u32 cp_mqd_base_addr_hi;
2380 u32 cp_hqd_active;
2381 u32 cp_hqd_vmid;
2382 u32 cp_hqd_persistent_state;
2383 u32 cp_hqd_pipe_priority;
2384 u32 cp_hqd_queue_priority;
2385 u32 cp_hqd_quantum;
2386 u32 cp_hqd_pq_base;
2387 u32 cp_hqd_pq_base_hi;
2388 u32 cp_hqd_pq_rptr;
2389 u32 cp_hqd_pq_rptr_report_addr;
2390 u32 cp_hqd_pq_rptr_report_addr_hi;
2391 u32 cp_hqd_pq_wptr_poll_addr;
2392 u32 cp_hqd_pq_wptr_poll_addr_hi;
2393 u32 cp_hqd_pq_doorbell_control;
2394 u32 cp_hqd_pq_wptr;
2395 u32 cp_hqd_pq_control;
2396 u32 cp_hqd_ib_base_addr;
2397 u32 cp_hqd_ib_base_addr_hi;
2398 u32 cp_hqd_ib_rptr;
2399 u32 cp_hqd_ib_control;
2400 u32 cp_hqd_iq_timer;
2401 u32 cp_hqd_iq_rptr;
2402 u32 cp_hqd_dequeue_request;
2403 u32 cp_hqd_dma_offload;
2404 u32 cp_hqd_sema_cmd;
2405 u32 cp_hqd_msg_type;
2406 u32 cp_hqd_atomic0_preop_lo;
2407 u32 cp_hqd_atomic0_preop_hi;
2408 u32 cp_hqd_atomic1_preop_lo;
2409 u32 cp_hqd_atomic1_preop_hi;
2410 u32 cp_hqd_hq_scheduler0;
2411 u32 cp_hqd_hq_scheduler1;
2412 u32 cp_mqd_control;
2413};
2414
2415struct bonaire_mqd
2416{
2417 u32 header;
2418 u32 dispatch_initiator;
2419 u32 dimensions[3];
2420 u32 start_idx[3];
2421 u32 num_threads[3];
2422 u32 pipeline_stat_enable;
2423 u32 perf_counter_enable;
2424 u32 pgm[2];
2425 u32 tba[2];
2426 u32 tma[2];
2427 u32 pgm_rsrc[2];
2428 u32 vmid;
2429 u32 resource_limits;
2430 u32 static_thread_mgmt01[2];
2431 u32 tmp_ring_size;
2432 u32 static_thread_mgmt23[2];
2433 u32 restart[3];
2434 u32 thread_trace_enable;
2435 u32 reserved1;
2436 u32 user_data[16];
2437 u32 vgtcs_invoke_count[2];
2438 struct hqd_registers queue_state;
2439 u32 dequeue_cntr;
2440 u32 interrupt_queue[64];
2441};
2442
Alex Deucher841cf442012-12-18 21:47:44 -05002443/**
2444 * cik_cp_compute_resume - setup the compute queue registers
2445 *
2446 * @rdev: radeon_device pointer
2447 *
2448 * Program the compute queues and test them to make sure they
2449 * are working.
2450 * Returns 0 for success, error for failure.
2451 */
2452static int cik_cp_compute_resume(struct radeon_device *rdev)
2453{
Alex Deucher963e81f2013-06-26 17:37:11 -04002454 int r, i, idx;
2455 u32 tmp;
2456 bool use_doorbell = true;
2457 u64 hqd_gpu_addr;
2458 u64 mqd_gpu_addr;
2459 u64 eop_gpu_addr;
2460 u64 wb_gpu_addr;
2461 u32 *buf;
2462 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002463
Alex Deucher841cf442012-12-18 21:47:44 -05002464 r = cik_cp_compute_start(rdev);
2465 if (r)
2466 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002467
2468 /* fix up chicken bits */
2469 tmp = RREG32(CP_CPF_DEBUG);
2470 tmp |= (1 << 23);
2471 WREG32(CP_CPF_DEBUG, tmp);
2472
2473 /* init the pipes */
2474 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2475 int me = (i < 4) ? 1 : 2;
2476 int pipe = (i < 4) ? i : (i - 4);
2477
2478 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2479
2480 cik_srbm_select(rdev, me, pipe, 0, 0);
2481
2482 /* write the EOP addr */
2483 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2484 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2485
2486 /* set the VMID assigned */
2487 WREG32(CP_HPD_EOP_VMID, 0);
2488
2489 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2490 tmp = RREG32(CP_HPD_EOP_CONTROL);
2491 tmp &= ~EOP_SIZE_MASK;
2492 tmp |= drm_order(MEC_HPD_SIZE / 8);
2493 WREG32(CP_HPD_EOP_CONTROL, tmp);
2494 }
2495 cik_srbm_select(rdev, 0, 0, 0, 0);
2496
2497 /* init the queues. Just two for now. */
2498 for (i = 0; i < 2; i++) {
2499 if (i == 0)
2500 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2501 else
2502 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2503
2504 if (rdev->ring[idx].mqd_obj == NULL) {
2505 r = radeon_bo_create(rdev,
2506 sizeof(struct bonaire_mqd),
2507 PAGE_SIZE, true,
2508 RADEON_GEM_DOMAIN_GTT, NULL,
2509 &rdev->ring[idx].mqd_obj);
2510 if (r) {
2511 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2512 return r;
2513 }
2514 }
2515
2516 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2517 if (unlikely(r != 0)) {
2518 cik_cp_compute_fini(rdev);
2519 return r;
2520 }
2521 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2522 &mqd_gpu_addr);
2523 if (r) {
2524 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2525 cik_cp_compute_fini(rdev);
2526 return r;
2527 }
2528 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2529 if (r) {
2530 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2531 cik_cp_compute_fini(rdev);
2532 return r;
2533 }
2534
2535 /* doorbell offset */
2536 rdev->ring[idx].doorbell_offset =
2537 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2538
2539 /* init the mqd struct */
2540 memset(buf, 0, sizeof(struct bonaire_mqd));
2541
2542 mqd = (struct bonaire_mqd *)buf;
2543 mqd->header = 0xC0310800;
2544 mqd->static_thread_mgmt01[0] = 0xffffffff;
2545 mqd->static_thread_mgmt01[1] = 0xffffffff;
2546 mqd->static_thread_mgmt23[0] = 0xffffffff;
2547 mqd->static_thread_mgmt23[1] = 0xffffffff;
2548
2549 cik_srbm_select(rdev, rdev->ring[idx].me,
2550 rdev->ring[idx].pipe,
2551 rdev->ring[idx].queue, 0);
2552
2553 /* disable wptr polling */
2554 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2555 tmp &= ~WPTR_POLL_EN;
2556 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2557
2558 /* enable doorbell? */
2559 mqd->queue_state.cp_hqd_pq_doorbell_control =
2560 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2561 if (use_doorbell)
2562 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2563 else
2564 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2565 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2566 mqd->queue_state.cp_hqd_pq_doorbell_control);
2567
2568 /* disable the queue if it's active */
2569 mqd->queue_state.cp_hqd_dequeue_request = 0;
2570 mqd->queue_state.cp_hqd_pq_rptr = 0;
2571 mqd->queue_state.cp_hqd_pq_wptr= 0;
2572 if (RREG32(CP_HQD_ACTIVE) & 1) {
2573 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
2574 for (i = 0; i < rdev->usec_timeout; i++) {
2575 if (!(RREG32(CP_HQD_ACTIVE) & 1))
2576 break;
2577 udelay(1);
2578 }
2579 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
2580 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
2581 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
2582 }
2583
2584 /* set the pointer to the MQD */
2585 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
2586 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2587 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
2588 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
2589 /* set MQD vmid to 0 */
2590 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
2591 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
2592 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
2593
2594 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2595 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
2596 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
2597 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2598 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
2599 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
2600
2601 /* set up the HQD, this is similar to CP_RB0_CNTL */
2602 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
2603 mqd->queue_state.cp_hqd_pq_control &=
2604 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
2605
2606 mqd->queue_state.cp_hqd_pq_control |=
2607 drm_order(rdev->ring[idx].ring_size / 8);
2608 mqd->queue_state.cp_hqd_pq_control |=
2609 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
2610#ifdef __BIG_ENDIAN
2611 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
2612#endif
2613 mqd->queue_state.cp_hqd_pq_control &=
2614 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
2615 mqd->queue_state.cp_hqd_pq_control |=
2616 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
2617 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
2618
2619 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
2620 if (i == 0)
2621 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
2622 else
2623 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
2624 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
2625 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2626 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
2627 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
2628 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
2629
2630 /* set the wb address wether it's enabled or not */
2631 if (i == 0)
2632 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
2633 else
2634 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
2635 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
2636 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
2637 upper_32_bits(wb_gpu_addr) & 0xffff;
2638 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
2639 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
2640 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2641 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
2642
2643 /* enable the doorbell if requested */
2644 if (use_doorbell) {
2645 mqd->queue_state.cp_hqd_pq_doorbell_control =
2646 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2647 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
2648 mqd->queue_state.cp_hqd_pq_doorbell_control |=
2649 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
2650 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2651 mqd->queue_state.cp_hqd_pq_doorbell_control &=
2652 ~(DOORBELL_SOURCE | DOORBELL_HIT);
2653
2654 } else {
2655 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
2656 }
2657 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2658 mqd->queue_state.cp_hqd_pq_doorbell_control);
2659
2660 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2661 rdev->ring[idx].wptr = 0;
2662 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
2663 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
2664 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
2665 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
2666
2667 /* set the vmid for the queue */
2668 mqd->queue_state.cp_hqd_vmid = 0;
2669 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
2670
2671 /* activate the queue */
2672 mqd->queue_state.cp_hqd_active = 1;
2673 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
2674
2675 cik_srbm_select(rdev, 0, 0, 0, 0);
2676
2677 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
2678 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2679
2680 rdev->ring[idx].ready = true;
2681 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
2682 if (r)
2683 rdev->ring[idx].ready = false;
2684 }
2685
Alex Deucher841cf442012-12-18 21:47:44 -05002686 return 0;
2687}
2688
Alex Deucher841cf442012-12-18 21:47:44 -05002689static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2690{
2691 cik_cp_gfx_enable(rdev, enable);
2692 cik_cp_compute_enable(rdev, enable);
2693}
2694
Alex Deucher841cf442012-12-18 21:47:44 -05002695static int cik_cp_load_microcode(struct radeon_device *rdev)
2696{
2697 int r;
2698
2699 r = cik_cp_gfx_load_microcode(rdev);
2700 if (r)
2701 return r;
2702 r = cik_cp_compute_load_microcode(rdev);
2703 if (r)
2704 return r;
2705
2706 return 0;
2707}
2708
Alex Deucher841cf442012-12-18 21:47:44 -05002709static void cik_cp_fini(struct radeon_device *rdev)
2710{
2711 cik_cp_gfx_fini(rdev);
2712 cik_cp_compute_fini(rdev);
2713}
2714
Alex Deucher841cf442012-12-18 21:47:44 -05002715static int cik_cp_resume(struct radeon_device *rdev)
2716{
2717 int r;
2718
2719 /* Reset all cp blocks */
2720 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2721 RREG32(GRBM_SOFT_RESET);
2722 mdelay(15);
2723 WREG32(GRBM_SOFT_RESET, 0);
2724 RREG32(GRBM_SOFT_RESET);
2725
2726 r = cik_cp_load_microcode(rdev);
2727 if (r)
2728 return r;
2729
2730 r = cik_cp_gfx_resume(rdev);
2731 if (r)
2732 return r;
2733 r = cik_cp_compute_resume(rdev);
2734 if (r)
2735 return r;
2736
2737 return 0;
2738}
2739
Alex Deucher21a93e12013-04-09 12:47:11 -04002740/*
2741 * sDMA - System DMA
2742 * Starting with CIK, the GPU has new asynchronous
2743 * DMA engines. These engines are used for compute
2744 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2745 * and each one supports 1 ring buffer used for gfx
2746 * and 2 queues used for compute.
2747 *
2748 * The programming model is very similar to the CP
2749 * (ring buffer, IBs, etc.), but sDMA has it's own
2750 * packet format that is different from the PM4 format
2751 * used by the CP. sDMA supports copying data, writing
2752 * embedded data, solid fills, and a number of other
2753 * things. It also has support for tiling/detiling of
2754 * buffers.
2755 */
2756/**
2757 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2758 *
2759 * @rdev: radeon_device pointer
2760 * @ib: IB object to schedule
2761 *
2762 * Schedule an IB in the DMA ring (CIK).
2763 */
2764void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2765 struct radeon_ib *ib)
2766{
2767 struct radeon_ring *ring = &rdev->ring[ib->ring];
2768 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2769
2770 if (rdev->wb.enabled) {
2771 u32 next_rptr = ring->wptr + 5;
2772 while ((next_rptr & 7) != 4)
2773 next_rptr++;
2774 next_rptr += 4;
2775 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2776 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2777 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2778 radeon_ring_write(ring, 1); /* number of DWs to follow */
2779 radeon_ring_write(ring, next_rptr);
2780 }
2781
2782 /* IB packet must end on a 8 DW boundary */
2783 while ((ring->wptr & 7) != 4)
2784 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2785 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2786 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2787 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2788 radeon_ring_write(ring, ib->length_dw);
2789
2790}
2791
2792/**
2793 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2794 *
2795 * @rdev: radeon_device pointer
2796 * @fence: radeon fence object
2797 *
2798 * Add a DMA fence packet to the ring to write
2799 * the fence seq number and DMA trap packet to generate
2800 * an interrupt if needed (CIK).
2801 */
2802void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2803 struct radeon_fence *fence)
2804{
2805 struct radeon_ring *ring = &rdev->ring[fence->ring];
2806 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2807 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2808 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2809 u32 ref_and_mask;
2810
2811 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2812 ref_and_mask = SDMA0;
2813 else
2814 ref_and_mask = SDMA1;
2815
2816 /* write the fence */
2817 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2818 radeon_ring_write(ring, addr & 0xffffffff);
2819 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2820 radeon_ring_write(ring, fence->seq);
2821 /* generate an interrupt */
2822 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2823 /* flush HDP */
2824 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2825 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2826 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2827 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2828 radeon_ring_write(ring, ref_and_mask); /* MASK */
2829 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2830}
2831
2832/**
2833 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2834 *
2835 * @rdev: radeon_device pointer
2836 * @ring: radeon_ring structure holding ring information
2837 * @semaphore: radeon semaphore object
2838 * @emit_wait: wait or signal semaphore
2839 *
2840 * Add a DMA semaphore packet to the ring wait on or signal
2841 * other rings (CIK).
2842 */
2843void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2844 struct radeon_ring *ring,
2845 struct radeon_semaphore *semaphore,
2846 bool emit_wait)
2847{
2848 u64 addr = semaphore->gpu_addr;
2849 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2850
2851 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2852 radeon_ring_write(ring, addr & 0xfffffff8);
2853 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2854}
2855
2856/**
2857 * cik_sdma_gfx_stop - stop the gfx async dma engines
2858 *
2859 * @rdev: radeon_device pointer
2860 *
2861 * Stop the gfx async dma ring buffers (CIK).
2862 */
2863static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2864{
2865 u32 rb_cntl, reg_offset;
2866 int i;
2867
2868 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2869
2870 for (i = 0; i < 2; i++) {
2871 if (i == 0)
2872 reg_offset = SDMA0_REGISTER_OFFSET;
2873 else
2874 reg_offset = SDMA1_REGISTER_OFFSET;
2875 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2876 rb_cntl &= ~SDMA_RB_ENABLE;
2877 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2878 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2879 }
2880}
2881
2882/**
2883 * cik_sdma_rlc_stop - stop the compute async dma engines
2884 *
2885 * @rdev: radeon_device pointer
2886 *
2887 * Stop the compute async dma queues (CIK).
2888 */
2889static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2890{
2891 /* XXX todo */
2892}
2893
2894/**
2895 * cik_sdma_enable - stop the async dma engines
2896 *
2897 * @rdev: radeon_device pointer
2898 * @enable: enable/disable the DMA MEs.
2899 *
2900 * Halt or unhalt the async dma engines (CIK).
2901 */
2902static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2903{
2904 u32 me_cntl, reg_offset;
2905 int i;
2906
2907 for (i = 0; i < 2; i++) {
2908 if (i == 0)
2909 reg_offset = SDMA0_REGISTER_OFFSET;
2910 else
2911 reg_offset = SDMA1_REGISTER_OFFSET;
2912 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2913 if (enable)
2914 me_cntl &= ~SDMA_HALT;
2915 else
2916 me_cntl |= SDMA_HALT;
2917 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2918 }
2919}
2920
2921/**
2922 * cik_sdma_gfx_resume - setup and start the async dma engines
2923 *
2924 * @rdev: radeon_device pointer
2925 *
2926 * Set up the gfx DMA ring buffers and enable them (CIK).
2927 * Returns 0 for success, error for failure.
2928 */
2929static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2930{
2931 struct radeon_ring *ring;
2932 u32 rb_cntl, ib_cntl;
2933 u32 rb_bufsz;
2934 u32 reg_offset, wb_offset;
2935 int i, r;
2936
2937 for (i = 0; i < 2; i++) {
2938 if (i == 0) {
2939 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2940 reg_offset = SDMA0_REGISTER_OFFSET;
2941 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2942 } else {
2943 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2944 reg_offset = SDMA1_REGISTER_OFFSET;
2945 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2946 }
2947
2948 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2949 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2950
2951 /* Set ring buffer size in dwords */
2952 rb_bufsz = drm_order(ring->ring_size / 4);
2953 rb_cntl = rb_bufsz << 1;
2954#ifdef __BIG_ENDIAN
2955 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2956#endif
2957 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2958
2959 /* Initialize the ring buffer's read and write pointers */
2960 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2961 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2962
2963 /* set the wb address whether it's enabled or not */
2964 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2965 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2966 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2967 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2968
2969 if (rdev->wb.enabled)
2970 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2971
2972 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2973 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2974
2975 ring->wptr = 0;
2976 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2977
2978 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2979
2980 /* enable DMA RB */
2981 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2982
2983 ib_cntl = SDMA_IB_ENABLE;
2984#ifdef __BIG_ENDIAN
2985 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2986#endif
2987 /* enable DMA IBs */
2988 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2989
2990 ring->ready = true;
2991
2992 r = radeon_ring_test(rdev, ring->idx, ring);
2993 if (r) {
2994 ring->ready = false;
2995 return r;
2996 }
2997 }
2998
2999 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3000
3001 return 0;
3002}
3003
3004/**
3005 * cik_sdma_rlc_resume - setup and start the async dma engines
3006 *
3007 * @rdev: radeon_device pointer
3008 *
3009 * Set up the compute DMA queues and enable them (CIK).
3010 * Returns 0 for success, error for failure.
3011 */
3012static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3013{
3014 /* XXX todo */
3015 return 0;
3016}
3017
3018/**
3019 * cik_sdma_load_microcode - load the sDMA ME ucode
3020 *
3021 * @rdev: radeon_device pointer
3022 *
3023 * Loads the sDMA0/1 ucode.
3024 * Returns 0 for success, -EINVAL if the ucode is not available.
3025 */
3026static int cik_sdma_load_microcode(struct radeon_device *rdev)
3027{
3028 const __be32 *fw_data;
3029 int i;
3030
3031 if (!rdev->sdma_fw)
3032 return -EINVAL;
3033
3034 /* stop the gfx rings and rlc compute queues */
3035 cik_sdma_gfx_stop(rdev);
3036 cik_sdma_rlc_stop(rdev);
3037
3038 /* halt the MEs */
3039 cik_sdma_enable(rdev, false);
3040
3041 /* sdma0 */
3042 fw_data = (const __be32 *)rdev->sdma_fw->data;
3043 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3044 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3045 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3046 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3047
3048 /* sdma1 */
3049 fw_data = (const __be32 *)rdev->sdma_fw->data;
3050 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3051 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3052 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3053 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3054
3055 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3056 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3057 return 0;
3058}
3059
3060/**
3061 * cik_sdma_resume - setup and start the async dma engines
3062 *
3063 * @rdev: radeon_device pointer
3064 *
3065 * Set up the DMA engines and enable them (CIK).
3066 * Returns 0 for success, error for failure.
3067 */
3068static int cik_sdma_resume(struct radeon_device *rdev)
3069{
3070 int r;
3071
3072 /* Reset dma */
3073 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3074 RREG32(SRBM_SOFT_RESET);
3075 udelay(50);
3076 WREG32(SRBM_SOFT_RESET, 0);
3077 RREG32(SRBM_SOFT_RESET);
3078
3079 r = cik_sdma_load_microcode(rdev);
3080 if (r)
3081 return r;
3082
3083 /* unhalt the MEs */
3084 cik_sdma_enable(rdev, true);
3085
3086 /* start the gfx rings and rlc compute queues */
3087 r = cik_sdma_gfx_resume(rdev);
3088 if (r)
3089 return r;
3090 r = cik_sdma_rlc_resume(rdev);
3091 if (r)
3092 return r;
3093
3094 return 0;
3095}
3096
3097/**
3098 * cik_sdma_fini - tear down the async dma engines
3099 *
3100 * @rdev: radeon_device pointer
3101 *
3102 * Stop the async dma engines and free the rings (CIK).
3103 */
3104static void cik_sdma_fini(struct radeon_device *rdev)
3105{
3106 /* stop the gfx rings and rlc compute queues */
3107 cik_sdma_gfx_stop(rdev);
3108 cik_sdma_rlc_stop(rdev);
3109 /* halt the MEs */
3110 cik_sdma_enable(rdev, false);
3111 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3112 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3113 /* XXX - compute dma queue tear down */
3114}
3115
3116/**
3117 * cik_copy_dma - copy pages using the DMA engine
3118 *
3119 * @rdev: radeon_device pointer
3120 * @src_offset: src GPU address
3121 * @dst_offset: dst GPU address
3122 * @num_gpu_pages: number of GPU pages to xfer
3123 * @fence: radeon fence object
3124 *
3125 * Copy GPU paging using the DMA engine (CIK).
3126 * Used by the radeon ttm implementation to move pages if
3127 * registered as the asic copy callback.
3128 */
3129int cik_copy_dma(struct radeon_device *rdev,
3130 uint64_t src_offset, uint64_t dst_offset,
3131 unsigned num_gpu_pages,
3132 struct radeon_fence **fence)
3133{
3134 struct radeon_semaphore *sem = NULL;
3135 int ring_index = rdev->asic->copy.dma_ring_index;
3136 struct radeon_ring *ring = &rdev->ring[ring_index];
3137 u32 size_in_bytes, cur_size_in_bytes;
3138 int i, num_loops;
3139 int r = 0;
3140
3141 r = radeon_semaphore_create(rdev, &sem);
3142 if (r) {
3143 DRM_ERROR("radeon: moving bo (%d).\n", r);
3144 return r;
3145 }
3146
3147 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3148 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3149 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3150 if (r) {
3151 DRM_ERROR("radeon: moving bo (%d).\n", r);
3152 radeon_semaphore_free(rdev, &sem, NULL);
3153 return r;
3154 }
3155
3156 if (radeon_fence_need_sync(*fence, ring->idx)) {
3157 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3158 ring->idx);
3159 radeon_fence_note_sync(*fence, ring->idx);
3160 } else {
3161 radeon_semaphore_free(rdev, &sem, NULL);
3162 }
3163
3164 for (i = 0; i < num_loops; i++) {
3165 cur_size_in_bytes = size_in_bytes;
3166 if (cur_size_in_bytes > 0x1fffff)
3167 cur_size_in_bytes = 0x1fffff;
3168 size_in_bytes -= cur_size_in_bytes;
3169 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3170 radeon_ring_write(ring, cur_size_in_bytes);
3171 radeon_ring_write(ring, 0); /* src/dst endian swap */
3172 radeon_ring_write(ring, src_offset & 0xffffffff);
3173 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3174 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3175 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3176 src_offset += cur_size_in_bytes;
3177 dst_offset += cur_size_in_bytes;
3178 }
3179
3180 r = radeon_fence_emit(rdev, fence, ring->idx);
3181 if (r) {
3182 radeon_ring_unlock_undo(rdev, ring);
3183 return r;
3184 }
3185
3186 radeon_ring_unlock_commit(rdev, ring);
3187 radeon_semaphore_free(rdev, &sem, *fence);
3188
3189 return r;
3190}
3191
3192/**
3193 * cik_sdma_ring_test - simple async dma engine test
3194 *
3195 * @rdev: radeon_device pointer
3196 * @ring: radeon_ring structure holding ring information
3197 *
3198 * Test the DMA engine by writing using it to write an
3199 * value to memory. (CIK).
3200 * Returns 0 for success, error for failure.
3201 */
3202int cik_sdma_ring_test(struct radeon_device *rdev,
3203 struct radeon_ring *ring)
3204{
3205 unsigned i;
3206 int r;
3207 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3208 u32 tmp;
3209
3210 if (!ptr) {
3211 DRM_ERROR("invalid vram scratch pointer\n");
3212 return -EINVAL;
3213 }
3214
3215 tmp = 0xCAFEDEAD;
3216 writel(tmp, ptr);
3217
3218 r = radeon_ring_lock(rdev, ring, 4);
3219 if (r) {
3220 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3221 return r;
3222 }
3223 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3224 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3225 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3226 radeon_ring_write(ring, 1); /* number of DWs to follow */
3227 radeon_ring_write(ring, 0xDEADBEEF);
3228 radeon_ring_unlock_commit(rdev, ring);
3229
3230 for (i = 0; i < rdev->usec_timeout; i++) {
3231 tmp = readl(ptr);
3232 if (tmp == 0xDEADBEEF)
3233 break;
3234 DRM_UDELAY(1);
3235 }
3236
3237 if (i < rdev->usec_timeout) {
3238 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3239 } else {
3240 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3241 ring->idx, tmp);
3242 r = -EINVAL;
3243 }
3244 return r;
3245}
3246
3247/**
3248 * cik_sdma_ib_test - test an IB on the DMA engine
3249 *
3250 * @rdev: radeon_device pointer
3251 * @ring: radeon_ring structure holding ring information
3252 *
3253 * Test a simple IB in the DMA ring (CIK).
3254 * Returns 0 on success, error on failure.
3255 */
3256int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3257{
3258 struct radeon_ib ib;
3259 unsigned i;
3260 int r;
3261 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3262 u32 tmp = 0;
3263
3264 if (!ptr) {
3265 DRM_ERROR("invalid vram scratch pointer\n");
3266 return -EINVAL;
3267 }
3268
3269 tmp = 0xCAFEDEAD;
3270 writel(tmp, ptr);
3271
3272 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3273 if (r) {
3274 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3275 return r;
3276 }
3277
3278 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3279 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3280 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3281 ib.ptr[3] = 1;
3282 ib.ptr[4] = 0xDEADBEEF;
3283 ib.length_dw = 5;
3284
3285 r = radeon_ib_schedule(rdev, &ib, NULL);
3286 if (r) {
3287 radeon_ib_free(rdev, &ib);
3288 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3289 return r;
3290 }
3291 r = radeon_fence_wait(ib.fence, false);
3292 if (r) {
3293 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3294 return r;
3295 }
3296 for (i = 0; i < rdev->usec_timeout; i++) {
3297 tmp = readl(ptr);
3298 if (tmp == 0xDEADBEEF)
3299 break;
3300 DRM_UDELAY(1);
3301 }
3302 if (i < rdev->usec_timeout) {
3303 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3304 } else {
3305 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3306 r = -EINVAL;
3307 }
3308 radeon_ib_free(rdev, &ib);
3309 return r;
3310}
3311
Alex Deuchercc066712013-04-09 12:59:51 -04003312
3313static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3314{
3315 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3316 RREG32(GRBM_STATUS));
3317 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3318 RREG32(GRBM_STATUS2));
3319 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3320 RREG32(GRBM_STATUS_SE0));
3321 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3322 RREG32(GRBM_STATUS_SE1));
3323 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3324 RREG32(GRBM_STATUS_SE2));
3325 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3326 RREG32(GRBM_STATUS_SE3));
3327 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3328 RREG32(SRBM_STATUS));
3329 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3330 RREG32(SRBM_STATUS2));
3331 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3332 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3333 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3334 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003335 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3336 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3337 RREG32(CP_STALLED_STAT1));
3338 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3339 RREG32(CP_STALLED_STAT2));
3340 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3341 RREG32(CP_STALLED_STAT3));
3342 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3343 RREG32(CP_CPF_BUSY_STAT));
3344 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3345 RREG32(CP_CPF_STALLED_STAT1));
3346 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3347 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3348 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3349 RREG32(CP_CPC_STALLED_STAT1));
3350 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003351}
3352
Alex Deucher6f2043c2013-04-09 12:43:41 -04003353/**
Alex Deuchercc066712013-04-09 12:59:51 -04003354 * cik_gpu_check_soft_reset - check which blocks are busy
3355 *
3356 * @rdev: radeon_device pointer
3357 *
3358 * Check which blocks are busy and return the relevant reset
3359 * mask to be used by cik_gpu_soft_reset().
3360 * Returns a mask of the blocks to be reset.
3361 */
3362static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3363{
3364 u32 reset_mask = 0;
3365 u32 tmp;
3366
3367 /* GRBM_STATUS */
3368 tmp = RREG32(GRBM_STATUS);
3369 if (tmp & (PA_BUSY | SC_BUSY |
3370 BCI_BUSY | SX_BUSY |
3371 TA_BUSY | VGT_BUSY |
3372 DB_BUSY | CB_BUSY |
3373 GDS_BUSY | SPI_BUSY |
3374 IA_BUSY | IA_BUSY_NO_DMA))
3375 reset_mask |= RADEON_RESET_GFX;
3376
3377 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3378 reset_mask |= RADEON_RESET_CP;
3379
3380 /* GRBM_STATUS2 */
3381 tmp = RREG32(GRBM_STATUS2);
3382 if (tmp & RLC_BUSY)
3383 reset_mask |= RADEON_RESET_RLC;
3384
3385 /* SDMA0_STATUS_REG */
3386 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3387 if (!(tmp & SDMA_IDLE))
3388 reset_mask |= RADEON_RESET_DMA;
3389
3390 /* SDMA1_STATUS_REG */
3391 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3392 if (!(tmp & SDMA_IDLE))
3393 reset_mask |= RADEON_RESET_DMA1;
3394
3395 /* SRBM_STATUS2 */
3396 tmp = RREG32(SRBM_STATUS2);
3397 if (tmp & SDMA_BUSY)
3398 reset_mask |= RADEON_RESET_DMA;
3399
3400 if (tmp & SDMA1_BUSY)
3401 reset_mask |= RADEON_RESET_DMA1;
3402
3403 /* SRBM_STATUS */
3404 tmp = RREG32(SRBM_STATUS);
3405
3406 if (tmp & IH_BUSY)
3407 reset_mask |= RADEON_RESET_IH;
3408
3409 if (tmp & SEM_BUSY)
3410 reset_mask |= RADEON_RESET_SEM;
3411
3412 if (tmp & GRBM_RQ_PENDING)
3413 reset_mask |= RADEON_RESET_GRBM;
3414
3415 if (tmp & VMC_BUSY)
3416 reset_mask |= RADEON_RESET_VMC;
3417
3418 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3419 MCC_BUSY | MCD_BUSY))
3420 reset_mask |= RADEON_RESET_MC;
3421
3422 if (evergreen_is_display_hung(rdev))
3423 reset_mask |= RADEON_RESET_DISPLAY;
3424
3425 /* Skip MC reset as it's mostly likely not hung, just busy */
3426 if (reset_mask & RADEON_RESET_MC) {
3427 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3428 reset_mask &= ~RADEON_RESET_MC;
3429 }
3430
3431 return reset_mask;
3432}
3433
3434/**
3435 * cik_gpu_soft_reset - soft reset GPU
3436 *
3437 * @rdev: radeon_device pointer
3438 * @reset_mask: mask of which blocks to reset
3439 *
3440 * Soft reset the blocks specified in @reset_mask.
3441 */
3442static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3443{
3444 struct evergreen_mc_save save;
3445 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3446 u32 tmp;
3447
3448 if (reset_mask == 0)
3449 return;
3450
3451 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3452
3453 cik_print_gpu_status_regs(rdev);
3454 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3455 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3456 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3457 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3458
3459 /* stop the rlc */
3460 cik_rlc_stop(rdev);
3461
3462 /* Disable GFX parsing/prefetching */
3463 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3464
3465 /* Disable MEC parsing/prefetching */
3466 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3467
3468 if (reset_mask & RADEON_RESET_DMA) {
3469 /* sdma0 */
3470 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3471 tmp |= SDMA_HALT;
3472 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3473 }
3474 if (reset_mask & RADEON_RESET_DMA1) {
3475 /* sdma1 */
3476 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3477 tmp |= SDMA_HALT;
3478 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3479 }
3480
3481 evergreen_mc_stop(rdev, &save);
3482 if (evergreen_mc_wait_for_idle(rdev)) {
3483 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3484 }
3485
3486 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3487 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3488
3489 if (reset_mask & RADEON_RESET_CP) {
3490 grbm_soft_reset |= SOFT_RESET_CP;
3491
3492 srbm_soft_reset |= SOFT_RESET_GRBM;
3493 }
3494
3495 if (reset_mask & RADEON_RESET_DMA)
3496 srbm_soft_reset |= SOFT_RESET_SDMA;
3497
3498 if (reset_mask & RADEON_RESET_DMA1)
3499 srbm_soft_reset |= SOFT_RESET_SDMA1;
3500
3501 if (reset_mask & RADEON_RESET_DISPLAY)
3502 srbm_soft_reset |= SOFT_RESET_DC;
3503
3504 if (reset_mask & RADEON_RESET_RLC)
3505 grbm_soft_reset |= SOFT_RESET_RLC;
3506
3507 if (reset_mask & RADEON_RESET_SEM)
3508 srbm_soft_reset |= SOFT_RESET_SEM;
3509
3510 if (reset_mask & RADEON_RESET_IH)
3511 srbm_soft_reset |= SOFT_RESET_IH;
3512
3513 if (reset_mask & RADEON_RESET_GRBM)
3514 srbm_soft_reset |= SOFT_RESET_GRBM;
3515
3516 if (reset_mask & RADEON_RESET_VMC)
3517 srbm_soft_reset |= SOFT_RESET_VMC;
3518
3519 if (!(rdev->flags & RADEON_IS_IGP)) {
3520 if (reset_mask & RADEON_RESET_MC)
3521 srbm_soft_reset |= SOFT_RESET_MC;
3522 }
3523
3524 if (grbm_soft_reset) {
3525 tmp = RREG32(GRBM_SOFT_RESET);
3526 tmp |= grbm_soft_reset;
3527 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3528 WREG32(GRBM_SOFT_RESET, tmp);
3529 tmp = RREG32(GRBM_SOFT_RESET);
3530
3531 udelay(50);
3532
3533 tmp &= ~grbm_soft_reset;
3534 WREG32(GRBM_SOFT_RESET, tmp);
3535 tmp = RREG32(GRBM_SOFT_RESET);
3536 }
3537
3538 if (srbm_soft_reset) {
3539 tmp = RREG32(SRBM_SOFT_RESET);
3540 tmp |= srbm_soft_reset;
3541 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3542 WREG32(SRBM_SOFT_RESET, tmp);
3543 tmp = RREG32(SRBM_SOFT_RESET);
3544
3545 udelay(50);
3546
3547 tmp &= ~srbm_soft_reset;
3548 WREG32(SRBM_SOFT_RESET, tmp);
3549 tmp = RREG32(SRBM_SOFT_RESET);
3550 }
3551
3552 /* Wait a little for things to settle down */
3553 udelay(50);
3554
3555 evergreen_mc_resume(rdev, &save);
3556 udelay(50);
3557
3558 cik_print_gpu_status_regs(rdev);
3559}
3560
3561/**
3562 * cik_asic_reset - soft reset GPU
3563 *
3564 * @rdev: radeon_device pointer
3565 *
3566 * Look up which blocks are hung and attempt
3567 * to reset them.
3568 * Returns 0 for success.
3569 */
3570int cik_asic_reset(struct radeon_device *rdev)
3571{
3572 u32 reset_mask;
3573
3574 reset_mask = cik_gpu_check_soft_reset(rdev);
3575
3576 if (reset_mask)
3577 r600_set_bios_scratch_engine_hung(rdev, true);
3578
3579 cik_gpu_soft_reset(rdev, reset_mask);
3580
3581 reset_mask = cik_gpu_check_soft_reset(rdev);
3582
3583 if (!reset_mask)
3584 r600_set_bios_scratch_engine_hung(rdev, false);
3585
3586 return 0;
3587}
3588
3589/**
3590 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04003591 *
3592 * @rdev: radeon_device pointer
3593 * @ring: radeon_ring structure holding ring information
3594 *
3595 * Check if the 3D engine is locked up (CIK).
3596 * Returns true if the engine is locked, false if not.
3597 */
Alex Deuchercc066712013-04-09 12:59:51 -04003598bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04003599{
Alex Deuchercc066712013-04-09 12:59:51 -04003600 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04003601
Alex Deuchercc066712013-04-09 12:59:51 -04003602 if (!(reset_mask & (RADEON_RESET_GFX |
3603 RADEON_RESET_COMPUTE |
3604 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04003605 radeon_ring_lockup_update(ring);
3606 return false;
3607 }
3608 /* force CP activities */
3609 radeon_ring_force_activity(rdev, ring);
3610 return radeon_ring_test_lockup(rdev, ring);
3611}
3612
3613/**
Alex Deucher21a93e12013-04-09 12:47:11 -04003614 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3615 *
3616 * @rdev: radeon_device pointer
3617 * @ring: radeon_ring structure holding ring information
3618 *
3619 * Check if the async DMA engine is locked up (CIK).
3620 * Returns true if the engine appears to be locked up, false if not.
3621 */
3622bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3623{
Alex Deuchercc066712013-04-09 12:59:51 -04003624 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3625 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04003626
3627 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04003628 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04003629 else
Alex Deuchercc066712013-04-09 12:59:51 -04003630 mask = RADEON_RESET_DMA1;
3631
3632 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04003633 radeon_ring_lockup_update(ring);
3634 return false;
3635 }
3636 /* force ring activities */
3637 radeon_ring_force_activity(rdev, ring);
3638 return radeon_ring_test_lockup(rdev, ring);
3639}
3640
Alex Deucher1c491652013-04-09 12:45:26 -04003641/* MC */
3642/**
3643 * cik_mc_program - program the GPU memory controller
3644 *
3645 * @rdev: radeon_device pointer
3646 *
3647 * Set the location of vram, gart, and AGP in the GPU's
3648 * physical address space (CIK).
3649 */
3650static void cik_mc_program(struct radeon_device *rdev)
3651{
3652 struct evergreen_mc_save save;
3653 u32 tmp;
3654 int i, j;
3655
3656 /* Initialize HDP */
3657 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3658 WREG32((0x2c14 + j), 0x00000000);
3659 WREG32((0x2c18 + j), 0x00000000);
3660 WREG32((0x2c1c + j), 0x00000000);
3661 WREG32((0x2c20 + j), 0x00000000);
3662 WREG32((0x2c24 + j), 0x00000000);
3663 }
3664 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3665
3666 evergreen_mc_stop(rdev, &save);
3667 if (radeon_mc_wait_for_idle(rdev)) {
3668 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3669 }
3670 /* Lockout access through VGA aperture*/
3671 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3672 /* Update configuration */
3673 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3674 rdev->mc.vram_start >> 12);
3675 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3676 rdev->mc.vram_end >> 12);
3677 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3678 rdev->vram_scratch.gpu_addr >> 12);
3679 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3680 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3681 WREG32(MC_VM_FB_LOCATION, tmp);
3682 /* XXX double check these! */
3683 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3684 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3685 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3686 WREG32(MC_VM_AGP_BASE, 0);
3687 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3688 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3689 if (radeon_mc_wait_for_idle(rdev)) {
3690 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3691 }
3692 evergreen_mc_resume(rdev, &save);
3693 /* we need to own VRAM, so turn off the VGA renderer here
3694 * to stop it overwriting our objects */
3695 rv515_vga_render_disable(rdev);
3696}
3697
3698/**
3699 * cik_mc_init - initialize the memory controller driver params
3700 *
3701 * @rdev: radeon_device pointer
3702 *
3703 * Look up the amount of vram, vram width, and decide how to place
3704 * vram and gart within the GPU's physical address space (CIK).
3705 * Returns 0 for success.
3706 */
3707static int cik_mc_init(struct radeon_device *rdev)
3708{
3709 u32 tmp;
3710 int chansize, numchan;
3711
3712 /* Get VRAM informations */
3713 rdev->mc.vram_is_ddr = true;
3714 tmp = RREG32(MC_ARB_RAMCFG);
3715 if (tmp & CHANSIZE_MASK) {
3716 chansize = 64;
3717 } else {
3718 chansize = 32;
3719 }
3720 tmp = RREG32(MC_SHARED_CHMAP);
3721 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3722 case 0:
3723 default:
3724 numchan = 1;
3725 break;
3726 case 1:
3727 numchan = 2;
3728 break;
3729 case 2:
3730 numchan = 4;
3731 break;
3732 case 3:
3733 numchan = 8;
3734 break;
3735 case 4:
3736 numchan = 3;
3737 break;
3738 case 5:
3739 numchan = 6;
3740 break;
3741 case 6:
3742 numchan = 10;
3743 break;
3744 case 7:
3745 numchan = 12;
3746 break;
3747 case 8:
3748 numchan = 16;
3749 break;
3750 }
3751 rdev->mc.vram_width = numchan * chansize;
3752 /* Could aper size report 0 ? */
3753 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3754 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3755 /* size in MB on si */
3756 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3757 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3758 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3759 si_vram_gtt_location(rdev, &rdev->mc);
3760 radeon_update_bandwidth_info(rdev);
3761
3762 return 0;
3763}
3764
3765/*
3766 * GART
3767 * VMID 0 is the physical GPU addresses as used by the kernel.
3768 * VMIDs 1-15 are used for userspace clients and are handled
3769 * by the radeon vm/hsa code.
3770 */
3771/**
3772 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3773 *
3774 * @rdev: radeon_device pointer
3775 *
3776 * Flush the TLB for the VMID 0 page table (CIK).
3777 */
3778void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3779{
3780 /* flush hdp cache */
3781 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3782
3783 /* bits 0-15 are the VM contexts0-15 */
3784 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3785}
3786
3787/**
3788 * cik_pcie_gart_enable - gart enable
3789 *
3790 * @rdev: radeon_device pointer
3791 *
3792 * This sets up the TLBs, programs the page tables for VMID0,
3793 * sets up the hw for VMIDs 1-15 which are allocated on
3794 * demand, and sets up the global locations for the LDS, GDS,
3795 * and GPUVM for FSA64 clients (CIK).
3796 * Returns 0 for success, errors for failure.
3797 */
3798static int cik_pcie_gart_enable(struct radeon_device *rdev)
3799{
3800 int r, i;
3801
3802 if (rdev->gart.robj == NULL) {
3803 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3804 return -EINVAL;
3805 }
3806 r = radeon_gart_table_vram_pin(rdev);
3807 if (r)
3808 return r;
3809 radeon_gart_restore(rdev);
3810 /* Setup TLB control */
3811 WREG32(MC_VM_MX_L1_TLB_CNTL,
3812 (0xA << 7) |
3813 ENABLE_L1_TLB |
3814 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3815 ENABLE_ADVANCED_DRIVER_MODEL |
3816 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3817 /* Setup L2 cache */
3818 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3819 ENABLE_L2_FRAGMENT_PROCESSING |
3820 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3821 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3822 EFFECTIVE_L2_QUEUE_SIZE(7) |
3823 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3824 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3825 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3826 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3827 /* setup context0 */
3828 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3829 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3830 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3831 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3832 (u32)(rdev->dummy_page.addr >> 12));
3833 WREG32(VM_CONTEXT0_CNTL2, 0);
3834 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3835 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3836
3837 WREG32(0x15D4, 0);
3838 WREG32(0x15D8, 0);
3839 WREG32(0x15DC, 0);
3840
3841 /* empty context1-15 */
3842 /* FIXME start with 4G, once using 2 level pt switch to full
3843 * vm size space
3844 */
3845 /* set vm size, must be a multiple of 4 */
3846 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3847 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3848 for (i = 1; i < 16; i++) {
3849 if (i < 8)
3850 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3851 rdev->gart.table_addr >> 12);
3852 else
3853 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3854 rdev->gart.table_addr >> 12);
3855 }
3856
3857 /* enable context1-15 */
3858 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3859 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04003860 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04003861 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04003862 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3863 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3864 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3865 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3866 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3867 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3868 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3869 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3870 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3871 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3872 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3873 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04003874
3875 /* TC cache setup ??? */
3876 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3877 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3878 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3879
3880 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3881 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3882 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3883 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3884 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3885
3886 WREG32(TC_CFG_L1_VOLATILE, 0);
3887 WREG32(TC_CFG_L2_VOLATILE, 0);
3888
3889 if (rdev->family == CHIP_KAVERI) {
3890 u32 tmp = RREG32(CHUB_CONTROL);
3891 tmp &= ~BYPASS_VM;
3892 WREG32(CHUB_CONTROL, tmp);
3893 }
3894
3895 /* XXX SH_MEM regs */
3896 /* where to put LDS, scratch, GPUVM in FSA64 space */
3897 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05003898 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04003899 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04003900 WREG32(SH_MEM_CONFIG, 0);
3901 WREG32(SH_MEM_APE1_BASE, 1);
3902 WREG32(SH_MEM_APE1_LIMIT, 0);
3903 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04003904 /* SDMA GFX */
3905 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3906 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3907 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3908 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3909 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04003910 }
Alex Deucherb556b122013-01-29 10:44:22 -05003911 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucher1c491652013-04-09 12:45:26 -04003912
3913 cik_pcie_gart_tlb_flush(rdev);
3914 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3915 (unsigned)(rdev->mc.gtt_size >> 20),
3916 (unsigned long long)rdev->gart.table_addr);
3917 rdev->gart.ready = true;
3918 return 0;
3919}
3920
3921/**
3922 * cik_pcie_gart_disable - gart disable
3923 *
3924 * @rdev: radeon_device pointer
3925 *
3926 * This disables all VM page table (CIK).
3927 */
3928static void cik_pcie_gart_disable(struct radeon_device *rdev)
3929{
3930 /* Disable all tables */
3931 WREG32(VM_CONTEXT0_CNTL, 0);
3932 WREG32(VM_CONTEXT1_CNTL, 0);
3933 /* Setup TLB control */
3934 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3935 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3936 /* Setup L2 cache */
3937 WREG32(VM_L2_CNTL,
3938 ENABLE_L2_FRAGMENT_PROCESSING |
3939 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3940 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3941 EFFECTIVE_L2_QUEUE_SIZE(7) |
3942 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3943 WREG32(VM_L2_CNTL2, 0);
3944 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3945 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3946 radeon_gart_table_vram_unpin(rdev);
3947}
3948
3949/**
3950 * cik_pcie_gart_fini - vm fini callback
3951 *
3952 * @rdev: radeon_device pointer
3953 *
3954 * Tears down the driver GART/VM setup (CIK).
3955 */
3956static void cik_pcie_gart_fini(struct radeon_device *rdev)
3957{
3958 cik_pcie_gart_disable(rdev);
3959 radeon_gart_table_vram_free(rdev);
3960 radeon_gart_fini(rdev);
3961}
3962
3963/* vm parser */
3964/**
3965 * cik_ib_parse - vm ib_parse callback
3966 *
3967 * @rdev: radeon_device pointer
3968 * @ib: indirect buffer pointer
3969 *
3970 * CIK uses hw IB checking so this is a nop (CIK).
3971 */
3972int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3973{
3974 return 0;
3975}
3976
3977/*
3978 * vm
3979 * VMID 0 is the physical GPU addresses as used by the kernel.
3980 * VMIDs 1-15 are used for userspace clients and are handled
3981 * by the radeon vm/hsa code.
3982 */
3983/**
3984 * cik_vm_init - cik vm init callback
3985 *
3986 * @rdev: radeon_device pointer
3987 *
3988 * Inits cik specific vm parameters (number of VMs, base of vram for
3989 * VMIDs 1-15) (CIK).
3990 * Returns 0 for success.
3991 */
3992int cik_vm_init(struct radeon_device *rdev)
3993{
3994 /* number of VMs */
3995 rdev->vm_manager.nvm = 16;
3996 /* base offset of vram pages */
3997 if (rdev->flags & RADEON_IS_IGP) {
3998 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3999 tmp <<= 22;
4000 rdev->vm_manager.vram_base_offset = tmp;
4001 } else
4002 rdev->vm_manager.vram_base_offset = 0;
4003
4004 return 0;
4005}
4006
4007/**
4008 * cik_vm_fini - cik vm fini callback
4009 *
4010 * @rdev: radeon_device pointer
4011 *
4012 * Tear down any asic specific VM setup (CIK).
4013 */
4014void cik_vm_fini(struct radeon_device *rdev)
4015{
4016}
4017
Alex Deucherf96ab482012-08-31 10:37:47 -04004018/**
4019 * cik_vm_flush - cik vm flush using the CP
4020 *
4021 * @rdev: radeon_device pointer
4022 *
4023 * Update the page table base and flush the VM TLB
4024 * using the CP (CIK).
4025 */
4026void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4027{
4028 struct radeon_ring *ring = &rdev->ring[ridx];
4029
4030 if (vm == NULL)
4031 return;
4032
4033 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4034 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4035 WRITE_DATA_DST_SEL(0)));
4036 if (vm->id < 8) {
4037 radeon_ring_write(ring,
4038 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4039 } else {
4040 radeon_ring_write(ring,
4041 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4042 }
4043 radeon_ring_write(ring, 0);
4044 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4045
4046 /* update SH_MEM_* regs */
4047 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4048 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4049 WRITE_DATA_DST_SEL(0)));
4050 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4051 radeon_ring_write(ring, 0);
4052 radeon_ring_write(ring, VMID(vm->id));
4053
4054 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4055 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4056 WRITE_DATA_DST_SEL(0)));
4057 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4058 radeon_ring_write(ring, 0);
4059
4060 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4061 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4062 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4063 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4064
4065 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4066 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4067 WRITE_DATA_DST_SEL(0)));
4068 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4069 radeon_ring_write(ring, 0);
4070 radeon_ring_write(ring, VMID(0));
4071
4072 /* HDP flush */
4073 /* We should be using the WAIT_REG_MEM packet here like in
4074 * cik_fence_ring_emit(), but it causes the CP to hang in this
4075 * context...
4076 */
4077 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4078 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4079 WRITE_DATA_DST_SEL(0)));
4080 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4081 radeon_ring_write(ring, 0);
4082 radeon_ring_write(ring, 0);
4083
4084 /* bits 0-15 are the VM contexts0-15 */
4085 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4086 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4087 WRITE_DATA_DST_SEL(0)));
4088 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4089 radeon_ring_write(ring, 0);
4090 radeon_ring_write(ring, 1 << vm->id);
4091
Alex Deucherb07fdd32013-04-11 09:36:17 -04004092 /* compute doesn't have PFP */
4093 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4094 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4095 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4096 radeon_ring_write(ring, 0x0);
4097 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004098}
4099
Alex Deucher605de6b2012-10-22 13:04:03 -04004100/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004101 * cik_vm_set_page - update the page tables using sDMA
4102 *
4103 * @rdev: radeon_device pointer
4104 * @ib: indirect buffer to fill with commands
4105 * @pe: addr of the page entry
4106 * @addr: dst addr to write into pe
4107 * @count: number of page entries to update
4108 * @incr: increase next addr by incr bytes
4109 * @flags: access flags
4110 *
4111 * Update the page tables using CP or sDMA (CIK).
4112 */
4113void cik_vm_set_page(struct radeon_device *rdev,
4114 struct radeon_ib *ib,
4115 uint64_t pe,
4116 uint64_t addr, unsigned count,
4117 uint32_t incr, uint32_t flags)
4118{
4119 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4120 uint64_t value;
4121 unsigned ndw;
4122
4123 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4124 /* CP */
4125 while (count) {
4126 ndw = 2 + count * 2;
4127 if (ndw > 0x3FFE)
4128 ndw = 0x3FFE;
4129
4130 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4131 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4132 WRITE_DATA_DST_SEL(1));
4133 ib->ptr[ib->length_dw++] = pe;
4134 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4135 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4136 if (flags & RADEON_VM_PAGE_SYSTEM) {
4137 value = radeon_vm_map_gart(rdev, addr);
4138 value &= 0xFFFFFFFFFFFFF000ULL;
4139 } else if (flags & RADEON_VM_PAGE_VALID) {
4140 value = addr;
4141 } else {
4142 value = 0;
4143 }
4144 addr += incr;
4145 value |= r600_flags;
4146 ib->ptr[ib->length_dw++] = value;
4147 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4148 }
4149 }
4150 } else {
4151 /* DMA */
4152 if (flags & RADEON_VM_PAGE_SYSTEM) {
4153 while (count) {
4154 ndw = count * 2;
4155 if (ndw > 0xFFFFE)
4156 ndw = 0xFFFFE;
4157
4158 /* for non-physically contiguous pages (system) */
4159 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4160 ib->ptr[ib->length_dw++] = pe;
4161 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4162 ib->ptr[ib->length_dw++] = ndw;
4163 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4164 if (flags & RADEON_VM_PAGE_SYSTEM) {
4165 value = radeon_vm_map_gart(rdev, addr);
4166 value &= 0xFFFFFFFFFFFFF000ULL;
4167 } else if (flags & RADEON_VM_PAGE_VALID) {
4168 value = addr;
4169 } else {
4170 value = 0;
4171 }
4172 addr += incr;
4173 value |= r600_flags;
4174 ib->ptr[ib->length_dw++] = value;
4175 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4176 }
4177 }
4178 } else {
4179 while (count) {
4180 ndw = count;
4181 if (ndw > 0x7FFFF)
4182 ndw = 0x7FFFF;
4183
4184 if (flags & RADEON_VM_PAGE_VALID)
4185 value = addr;
4186 else
4187 value = 0;
4188 /* for physically contiguous pages (vram) */
4189 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4190 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4191 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4192 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4193 ib->ptr[ib->length_dw++] = 0;
4194 ib->ptr[ib->length_dw++] = value; /* value */
4195 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4196 ib->ptr[ib->length_dw++] = incr; /* increment size */
4197 ib->ptr[ib->length_dw++] = 0;
4198 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4199 pe += ndw * 8;
4200 addr += ndw * incr;
4201 count -= ndw;
4202 }
4203 }
4204 while (ib->length_dw & 0x7)
4205 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4206 }
4207}
4208
4209/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004210 * cik_dma_vm_flush - cik vm flush using sDMA
4211 *
4212 * @rdev: radeon_device pointer
4213 *
4214 * Update the page table base and flush the VM TLB
4215 * using sDMA (CIK).
4216 */
4217void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4218{
4219 struct radeon_ring *ring = &rdev->ring[ridx];
4220 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4221 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4222 u32 ref_and_mask;
4223
4224 if (vm == NULL)
4225 return;
4226
4227 if (ridx == R600_RING_TYPE_DMA_INDEX)
4228 ref_and_mask = SDMA0;
4229 else
4230 ref_and_mask = SDMA1;
4231
4232 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4233 if (vm->id < 8) {
4234 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4235 } else {
4236 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4237 }
4238 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4239
4240 /* update SH_MEM_* regs */
4241 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4242 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4243 radeon_ring_write(ring, VMID(vm->id));
4244
4245 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4246 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4247 radeon_ring_write(ring, 0);
4248
4249 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4250 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4251 radeon_ring_write(ring, 0);
4252
4253 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4254 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4255 radeon_ring_write(ring, 1);
4256
4257 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4258 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4259 radeon_ring_write(ring, 0);
4260
4261 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4262 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4263 radeon_ring_write(ring, VMID(0));
4264
4265 /* flush HDP */
4266 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4267 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4268 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4269 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4270 radeon_ring_write(ring, ref_and_mask); /* MASK */
4271 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4272
4273 /* flush TLB */
4274 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4275 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4276 radeon_ring_write(ring, 1 << vm->id);
4277}
4278
Alex Deucherf6796ca2012-11-09 10:44:08 -05004279/*
4280 * RLC
4281 * The RLC is a multi-purpose microengine that handles a
4282 * variety of functions, the most important of which is
4283 * the interrupt controller.
4284 */
4285/**
4286 * cik_rlc_stop - stop the RLC ME
4287 *
4288 * @rdev: radeon_device pointer
4289 *
4290 * Halt the RLC ME (MicroEngine) (CIK).
4291 */
4292static void cik_rlc_stop(struct radeon_device *rdev)
4293{
4294 int i, j, k;
4295 u32 mask, tmp;
4296
4297 tmp = RREG32(CP_INT_CNTL_RING0);
4298 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4299 WREG32(CP_INT_CNTL_RING0, tmp);
4300
4301 RREG32(CB_CGTT_SCLK_CTRL);
4302 RREG32(CB_CGTT_SCLK_CTRL);
4303 RREG32(CB_CGTT_SCLK_CTRL);
4304 RREG32(CB_CGTT_SCLK_CTRL);
4305
4306 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4307 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4308
4309 WREG32(RLC_CNTL, 0);
4310
4311 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4312 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4313 cik_select_se_sh(rdev, i, j);
4314 for (k = 0; k < rdev->usec_timeout; k++) {
4315 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4316 break;
4317 udelay(1);
4318 }
4319 }
4320 }
4321 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4322
4323 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4324 for (k = 0; k < rdev->usec_timeout; k++) {
4325 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4326 break;
4327 udelay(1);
4328 }
4329}
4330
4331/**
4332 * cik_rlc_start - start the RLC ME
4333 *
4334 * @rdev: radeon_device pointer
4335 *
4336 * Unhalt the RLC ME (MicroEngine) (CIK).
4337 */
4338static void cik_rlc_start(struct radeon_device *rdev)
4339{
4340 u32 tmp;
4341
4342 WREG32(RLC_CNTL, RLC_ENABLE);
4343
4344 tmp = RREG32(CP_INT_CNTL_RING0);
4345 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4346 WREG32(CP_INT_CNTL_RING0, tmp);
4347
4348 udelay(50);
4349}
4350
4351/**
4352 * cik_rlc_resume - setup the RLC hw
4353 *
4354 * @rdev: radeon_device pointer
4355 *
4356 * Initialize the RLC registers, load the ucode,
4357 * and start the RLC (CIK).
4358 * Returns 0 for success, -EINVAL if the ucode is not available.
4359 */
4360static int cik_rlc_resume(struct radeon_device *rdev)
4361{
4362 u32 i, size;
4363 u32 clear_state_info[3];
4364 const __be32 *fw_data;
4365
4366 if (!rdev->rlc_fw)
4367 return -EINVAL;
4368
4369 switch (rdev->family) {
4370 case CHIP_BONAIRE:
4371 default:
4372 size = BONAIRE_RLC_UCODE_SIZE;
4373 break;
4374 case CHIP_KAVERI:
4375 size = KV_RLC_UCODE_SIZE;
4376 break;
4377 case CHIP_KABINI:
4378 size = KB_RLC_UCODE_SIZE;
4379 break;
4380 }
4381
4382 cik_rlc_stop(rdev);
4383
4384 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4385 RREG32(GRBM_SOFT_RESET);
4386 udelay(50);
4387 WREG32(GRBM_SOFT_RESET, 0);
4388 RREG32(GRBM_SOFT_RESET);
4389 udelay(50);
4390
4391 WREG32(RLC_LB_CNTR_INIT, 0);
4392 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4393
4394 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4395 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4396 WREG32(RLC_LB_PARAMS, 0x00600408);
4397 WREG32(RLC_LB_CNTL, 0x80000004);
4398
4399 WREG32(RLC_MC_CNTL, 0);
4400 WREG32(RLC_UCODE_CNTL, 0);
4401
4402 fw_data = (const __be32 *)rdev->rlc_fw->data;
4403 WREG32(RLC_GPM_UCODE_ADDR, 0);
4404 for (i = 0; i < size; i++)
4405 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4406 WREG32(RLC_GPM_UCODE_ADDR, 0);
4407
4408 /* XXX */
4409 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4410 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4411 clear_state_info[2] = 0;//cik_default_size;
4412 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4413 for (i = 0; i < 3; i++)
4414 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4415 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4416
4417 cik_rlc_start(rdev);
4418
4419 return 0;
4420}
Alex Deuchera59781b2012-11-09 10:45:57 -05004421
4422/*
4423 * Interrupts
4424 * Starting with r6xx, interrupts are handled via a ring buffer.
4425 * Ring buffers are areas of GPU accessible memory that the GPU
4426 * writes interrupt vectors into and the host reads vectors out of.
4427 * There is a rptr (read pointer) that determines where the
4428 * host is currently reading, and a wptr (write pointer)
4429 * which determines where the GPU has written. When the
4430 * pointers are equal, the ring is idle. When the GPU
4431 * writes vectors to the ring buffer, it increments the
4432 * wptr. When there is an interrupt, the host then starts
4433 * fetching commands and processing them until the pointers are
4434 * equal again at which point it updates the rptr.
4435 */
4436
4437/**
4438 * cik_enable_interrupts - Enable the interrupt ring buffer
4439 *
4440 * @rdev: radeon_device pointer
4441 *
4442 * Enable the interrupt ring buffer (CIK).
4443 */
4444static void cik_enable_interrupts(struct radeon_device *rdev)
4445{
4446 u32 ih_cntl = RREG32(IH_CNTL);
4447 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4448
4449 ih_cntl |= ENABLE_INTR;
4450 ih_rb_cntl |= IH_RB_ENABLE;
4451 WREG32(IH_CNTL, ih_cntl);
4452 WREG32(IH_RB_CNTL, ih_rb_cntl);
4453 rdev->ih.enabled = true;
4454}
4455
4456/**
4457 * cik_disable_interrupts - Disable the interrupt ring buffer
4458 *
4459 * @rdev: radeon_device pointer
4460 *
4461 * Disable the interrupt ring buffer (CIK).
4462 */
4463static void cik_disable_interrupts(struct radeon_device *rdev)
4464{
4465 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4466 u32 ih_cntl = RREG32(IH_CNTL);
4467
4468 ih_rb_cntl &= ~IH_RB_ENABLE;
4469 ih_cntl &= ~ENABLE_INTR;
4470 WREG32(IH_RB_CNTL, ih_rb_cntl);
4471 WREG32(IH_CNTL, ih_cntl);
4472 /* set rptr, wptr to 0 */
4473 WREG32(IH_RB_RPTR, 0);
4474 WREG32(IH_RB_WPTR, 0);
4475 rdev->ih.enabled = false;
4476 rdev->ih.rptr = 0;
4477}
4478
4479/**
4480 * cik_disable_interrupt_state - Disable all interrupt sources
4481 *
4482 * @rdev: radeon_device pointer
4483 *
4484 * Clear all interrupt enable bits used by the driver (CIK).
4485 */
4486static void cik_disable_interrupt_state(struct radeon_device *rdev)
4487{
4488 u32 tmp;
4489
4490 /* gfx ring */
4491 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004492 /* sdma */
4493 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4494 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4495 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4496 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004497 /* compute queues */
4498 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4499 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4500 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4501 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4502 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4503 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4504 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4505 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4506 /* grbm */
4507 WREG32(GRBM_INT_CNTL, 0);
4508 /* vline/vblank, etc. */
4509 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4510 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4511 if (rdev->num_crtc >= 4) {
4512 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4513 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4514 }
4515 if (rdev->num_crtc >= 6) {
4516 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4517 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4518 }
4519
4520 /* dac hotplug */
4521 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4522
4523 /* digital hotplug */
4524 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4525 WREG32(DC_HPD1_INT_CONTROL, tmp);
4526 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4527 WREG32(DC_HPD2_INT_CONTROL, tmp);
4528 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4529 WREG32(DC_HPD3_INT_CONTROL, tmp);
4530 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4531 WREG32(DC_HPD4_INT_CONTROL, tmp);
4532 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4533 WREG32(DC_HPD5_INT_CONTROL, tmp);
4534 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4535 WREG32(DC_HPD6_INT_CONTROL, tmp);
4536
4537}
4538
4539/**
4540 * cik_irq_init - init and enable the interrupt ring
4541 *
4542 * @rdev: radeon_device pointer
4543 *
4544 * Allocate a ring buffer for the interrupt controller,
4545 * enable the RLC, disable interrupts, enable the IH
4546 * ring buffer and enable it (CIK).
4547 * Called at device load and reume.
4548 * Returns 0 for success, errors for failure.
4549 */
4550static int cik_irq_init(struct radeon_device *rdev)
4551{
4552 int ret = 0;
4553 int rb_bufsz;
4554 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4555
4556 /* allocate ring */
4557 ret = r600_ih_ring_alloc(rdev);
4558 if (ret)
4559 return ret;
4560
4561 /* disable irqs */
4562 cik_disable_interrupts(rdev);
4563
4564 /* init rlc */
4565 ret = cik_rlc_resume(rdev);
4566 if (ret) {
4567 r600_ih_ring_fini(rdev);
4568 return ret;
4569 }
4570
4571 /* setup interrupt control */
4572 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4573 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4574 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4575 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4576 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4577 */
4578 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4579 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4580 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4581 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4582
4583 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4584 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4585
4586 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4587 IH_WPTR_OVERFLOW_CLEAR |
4588 (rb_bufsz << 1));
4589
4590 if (rdev->wb.enabled)
4591 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4592
4593 /* set the writeback address whether it's enabled or not */
4594 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4595 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4596
4597 WREG32(IH_RB_CNTL, ih_rb_cntl);
4598
4599 /* set rptr, wptr to 0 */
4600 WREG32(IH_RB_RPTR, 0);
4601 WREG32(IH_RB_WPTR, 0);
4602
4603 /* Default settings for IH_CNTL (disabled at first) */
4604 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4605 /* RPTR_REARM only works if msi's are enabled */
4606 if (rdev->msi_enabled)
4607 ih_cntl |= RPTR_REARM;
4608 WREG32(IH_CNTL, ih_cntl);
4609
4610 /* force the active interrupt state to all disabled */
4611 cik_disable_interrupt_state(rdev);
4612
4613 pci_set_master(rdev->pdev);
4614
4615 /* enable irqs */
4616 cik_enable_interrupts(rdev);
4617
4618 return ret;
4619}
4620
4621/**
4622 * cik_irq_set - enable/disable interrupt sources
4623 *
4624 * @rdev: radeon_device pointer
4625 *
4626 * Enable interrupt sources on the GPU (vblanks, hpd,
4627 * etc.) (CIK).
4628 * Returns 0 for success, errors for failure.
4629 */
4630int cik_irq_set(struct radeon_device *rdev)
4631{
4632 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4633 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04004634 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
4635 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05004636 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4637 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4638 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04004639 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05004640
4641 if (!rdev->irq.installed) {
4642 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4643 return -EINVAL;
4644 }
4645 /* don't enable anything if the ih is disabled */
4646 if (!rdev->ih.enabled) {
4647 cik_disable_interrupts(rdev);
4648 /* force the active interrupt state to all disabled */
4649 cik_disable_interrupt_state(rdev);
4650 return 0;
4651 }
4652
4653 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4654 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4655 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4656 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4657 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4658 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4659
Alex Deucher21a93e12013-04-09 12:47:11 -04004660 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4661 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4662
Alex Deucher2b0781a2013-04-09 14:26:16 -04004663 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4664 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4665 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4666 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4667 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4668 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4669 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4670 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
4671
Alex Deuchera59781b2012-11-09 10:45:57 -05004672 /* enable CP interrupts on all rings */
4673 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4674 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4675 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4676 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04004677 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4678 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
4679 DRM_DEBUG("si_irq_set: sw int cp1\n");
4680 if (ring->me == 1) {
4681 switch (ring->pipe) {
4682 case 0:
4683 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
4684 break;
4685 case 1:
4686 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
4687 break;
4688 case 2:
4689 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
4690 break;
4691 case 3:
4692 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
4693 break;
4694 default:
4695 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
4696 break;
4697 }
4698 } else if (ring->me == 2) {
4699 switch (ring->pipe) {
4700 case 0:
4701 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
4702 break;
4703 case 1:
4704 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
4705 break;
4706 case 2:
4707 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
4708 break;
4709 case 3:
4710 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
4711 break;
4712 default:
4713 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
4714 break;
4715 }
4716 } else {
4717 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
4718 }
4719 }
4720 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4721 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
4722 DRM_DEBUG("si_irq_set: sw int cp2\n");
4723 if (ring->me == 1) {
4724 switch (ring->pipe) {
4725 case 0:
4726 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
4727 break;
4728 case 1:
4729 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
4730 break;
4731 case 2:
4732 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
4733 break;
4734 case 3:
4735 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
4736 break;
4737 default:
4738 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
4739 break;
4740 }
4741 } else if (ring->me == 2) {
4742 switch (ring->pipe) {
4743 case 0:
4744 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
4745 break;
4746 case 1:
4747 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
4748 break;
4749 case 2:
4750 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
4751 break;
4752 case 3:
4753 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
4754 break;
4755 default:
4756 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
4757 break;
4758 }
4759 } else {
4760 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
4761 }
4762 }
Alex Deuchera59781b2012-11-09 10:45:57 -05004763
Alex Deucher21a93e12013-04-09 12:47:11 -04004764 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4765 DRM_DEBUG("cik_irq_set: sw int dma\n");
4766 dma_cntl |= TRAP_ENABLE;
4767 }
4768
4769 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4770 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4771 dma_cntl1 |= TRAP_ENABLE;
4772 }
4773
Alex Deuchera59781b2012-11-09 10:45:57 -05004774 if (rdev->irq.crtc_vblank_int[0] ||
4775 atomic_read(&rdev->irq.pflip[0])) {
4776 DRM_DEBUG("cik_irq_set: vblank 0\n");
4777 crtc1 |= VBLANK_INTERRUPT_MASK;
4778 }
4779 if (rdev->irq.crtc_vblank_int[1] ||
4780 atomic_read(&rdev->irq.pflip[1])) {
4781 DRM_DEBUG("cik_irq_set: vblank 1\n");
4782 crtc2 |= VBLANK_INTERRUPT_MASK;
4783 }
4784 if (rdev->irq.crtc_vblank_int[2] ||
4785 atomic_read(&rdev->irq.pflip[2])) {
4786 DRM_DEBUG("cik_irq_set: vblank 2\n");
4787 crtc3 |= VBLANK_INTERRUPT_MASK;
4788 }
4789 if (rdev->irq.crtc_vblank_int[3] ||
4790 atomic_read(&rdev->irq.pflip[3])) {
4791 DRM_DEBUG("cik_irq_set: vblank 3\n");
4792 crtc4 |= VBLANK_INTERRUPT_MASK;
4793 }
4794 if (rdev->irq.crtc_vblank_int[4] ||
4795 atomic_read(&rdev->irq.pflip[4])) {
4796 DRM_DEBUG("cik_irq_set: vblank 4\n");
4797 crtc5 |= VBLANK_INTERRUPT_MASK;
4798 }
4799 if (rdev->irq.crtc_vblank_int[5] ||
4800 atomic_read(&rdev->irq.pflip[5])) {
4801 DRM_DEBUG("cik_irq_set: vblank 5\n");
4802 crtc6 |= VBLANK_INTERRUPT_MASK;
4803 }
4804 if (rdev->irq.hpd[0]) {
4805 DRM_DEBUG("cik_irq_set: hpd 1\n");
4806 hpd1 |= DC_HPDx_INT_EN;
4807 }
4808 if (rdev->irq.hpd[1]) {
4809 DRM_DEBUG("cik_irq_set: hpd 2\n");
4810 hpd2 |= DC_HPDx_INT_EN;
4811 }
4812 if (rdev->irq.hpd[2]) {
4813 DRM_DEBUG("cik_irq_set: hpd 3\n");
4814 hpd3 |= DC_HPDx_INT_EN;
4815 }
4816 if (rdev->irq.hpd[3]) {
4817 DRM_DEBUG("cik_irq_set: hpd 4\n");
4818 hpd4 |= DC_HPDx_INT_EN;
4819 }
4820 if (rdev->irq.hpd[4]) {
4821 DRM_DEBUG("cik_irq_set: hpd 5\n");
4822 hpd5 |= DC_HPDx_INT_EN;
4823 }
4824 if (rdev->irq.hpd[5]) {
4825 DRM_DEBUG("cik_irq_set: hpd 6\n");
4826 hpd6 |= DC_HPDx_INT_EN;
4827 }
4828
4829 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4830
Alex Deucher21a93e12013-04-09 12:47:11 -04004831 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4832 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4833
Alex Deucher2b0781a2013-04-09 14:26:16 -04004834 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
4835 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
4836 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
4837 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
4838 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
4839 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
4840 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
4841 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
4842
Alex Deuchera59781b2012-11-09 10:45:57 -05004843 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4844
4845 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4846 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4847 if (rdev->num_crtc >= 4) {
4848 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4849 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4850 }
4851 if (rdev->num_crtc >= 6) {
4852 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4853 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4854 }
4855
4856 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4857 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4858 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4859 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4860 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4861 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4862
4863 return 0;
4864}
4865
4866/**
4867 * cik_irq_ack - ack interrupt sources
4868 *
4869 * @rdev: radeon_device pointer
4870 *
4871 * Ack interrupt sources on the GPU (vblanks, hpd,
4872 * etc.) (CIK). Certain interrupts sources are sw
4873 * generated and do not require an explicit ack.
4874 */
4875static inline void cik_irq_ack(struct radeon_device *rdev)
4876{
4877 u32 tmp;
4878
4879 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4880 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4881 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4882 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4883 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4884 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4885 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4886
4887 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4888 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4889 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4890 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4891 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4892 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4893 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4894 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4895
4896 if (rdev->num_crtc >= 4) {
4897 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4898 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4899 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4900 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4901 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4902 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4903 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4904 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4905 }
4906
4907 if (rdev->num_crtc >= 6) {
4908 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4909 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4910 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4911 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4912 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4913 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4914 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4915 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4916 }
4917
4918 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4919 tmp = RREG32(DC_HPD1_INT_CONTROL);
4920 tmp |= DC_HPDx_INT_ACK;
4921 WREG32(DC_HPD1_INT_CONTROL, tmp);
4922 }
4923 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4924 tmp = RREG32(DC_HPD2_INT_CONTROL);
4925 tmp |= DC_HPDx_INT_ACK;
4926 WREG32(DC_HPD2_INT_CONTROL, tmp);
4927 }
4928 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4929 tmp = RREG32(DC_HPD3_INT_CONTROL);
4930 tmp |= DC_HPDx_INT_ACK;
4931 WREG32(DC_HPD3_INT_CONTROL, tmp);
4932 }
4933 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4934 tmp = RREG32(DC_HPD4_INT_CONTROL);
4935 tmp |= DC_HPDx_INT_ACK;
4936 WREG32(DC_HPD4_INT_CONTROL, tmp);
4937 }
4938 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4939 tmp = RREG32(DC_HPD5_INT_CONTROL);
4940 tmp |= DC_HPDx_INT_ACK;
4941 WREG32(DC_HPD5_INT_CONTROL, tmp);
4942 }
4943 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4944 tmp = RREG32(DC_HPD5_INT_CONTROL);
4945 tmp |= DC_HPDx_INT_ACK;
4946 WREG32(DC_HPD6_INT_CONTROL, tmp);
4947 }
4948}
4949
4950/**
4951 * cik_irq_disable - disable interrupts
4952 *
4953 * @rdev: radeon_device pointer
4954 *
4955 * Disable interrupts on the hw (CIK).
4956 */
4957static void cik_irq_disable(struct radeon_device *rdev)
4958{
4959 cik_disable_interrupts(rdev);
4960 /* Wait and acknowledge irq */
4961 mdelay(1);
4962 cik_irq_ack(rdev);
4963 cik_disable_interrupt_state(rdev);
4964}
4965
4966/**
4967 * cik_irq_disable - disable interrupts for suspend
4968 *
4969 * @rdev: radeon_device pointer
4970 *
4971 * Disable interrupts and stop the RLC (CIK).
4972 * Used for suspend.
4973 */
4974static void cik_irq_suspend(struct radeon_device *rdev)
4975{
4976 cik_irq_disable(rdev);
4977 cik_rlc_stop(rdev);
4978}
4979
4980/**
4981 * cik_irq_fini - tear down interrupt support
4982 *
4983 * @rdev: radeon_device pointer
4984 *
4985 * Disable interrupts on the hw and free the IH ring
4986 * buffer (CIK).
4987 * Used for driver unload.
4988 */
4989static void cik_irq_fini(struct radeon_device *rdev)
4990{
4991 cik_irq_suspend(rdev);
4992 r600_ih_ring_fini(rdev);
4993}
4994
4995/**
4996 * cik_get_ih_wptr - get the IH ring buffer wptr
4997 *
4998 * @rdev: radeon_device pointer
4999 *
5000 * Get the IH ring buffer wptr from either the register
5001 * or the writeback memory buffer (CIK). Also check for
5002 * ring buffer overflow and deal with it.
5003 * Used by cik_irq_process().
5004 * Returns the value of the wptr.
5005 */
5006static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5007{
5008 u32 wptr, tmp;
5009
5010 if (rdev->wb.enabled)
5011 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5012 else
5013 wptr = RREG32(IH_RB_WPTR);
5014
5015 if (wptr & RB_OVERFLOW) {
5016 /* When a ring buffer overflow happen start parsing interrupt
5017 * from the last not overwritten vector (wptr + 16). Hopefully
5018 * this should allow us to catchup.
5019 */
5020 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5021 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5022 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5023 tmp = RREG32(IH_RB_CNTL);
5024 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5025 WREG32(IH_RB_CNTL, tmp);
5026 }
5027 return (wptr & rdev->ih.ptr_mask);
5028}
5029
5030/* CIK IV Ring
5031 * Each IV ring entry is 128 bits:
5032 * [7:0] - interrupt source id
5033 * [31:8] - reserved
5034 * [59:32] - interrupt source data
5035 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04005036 * [71:64] - RINGID
5037 * CP:
5038 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05005039 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5040 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5041 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5042 * PIPE_ID - ME0 0=3D
5043 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04005044 * SDMA:
5045 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5046 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5047 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05005048 * [79:72] - VMID
5049 * [95:80] - PASID
5050 * [127:96] - reserved
5051 */
5052/**
5053 * cik_irq_process - interrupt handler
5054 *
5055 * @rdev: radeon_device pointer
5056 *
5057 * Interrupt hander (CIK). Walk the IH ring,
5058 * ack interrupts and schedule work to handle
5059 * interrupt events.
5060 * Returns irq process return code.
5061 */
5062int cik_irq_process(struct radeon_device *rdev)
5063{
Alex Deucher2b0781a2013-04-09 14:26:16 -04005064 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5065 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05005066 u32 wptr;
5067 u32 rptr;
5068 u32 src_id, src_data, ring_id;
5069 u8 me_id, pipe_id, queue_id;
5070 u32 ring_index;
5071 bool queue_hotplug = false;
5072 bool queue_reset = false;
5073
5074 if (!rdev->ih.enabled || rdev->shutdown)
5075 return IRQ_NONE;
5076
5077 wptr = cik_get_ih_wptr(rdev);
5078
5079restart_ih:
5080 /* is somebody else already processing irqs? */
5081 if (atomic_xchg(&rdev->ih.lock, 1))
5082 return IRQ_NONE;
5083
5084 rptr = rdev->ih.rptr;
5085 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5086
5087 /* Order reading of wptr vs. reading of IH ring data */
5088 rmb();
5089
5090 /* display interrupts */
5091 cik_irq_ack(rdev);
5092
5093 while (rptr != wptr) {
5094 /* wptr/rptr are in bytes! */
5095 ring_index = rptr / 4;
5096 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5097 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5098 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05005099
5100 switch (src_id) {
5101 case 1: /* D1 vblank/vline */
5102 switch (src_data) {
5103 case 0: /* D1 vblank */
5104 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5105 if (rdev->irq.crtc_vblank_int[0]) {
5106 drm_handle_vblank(rdev->ddev, 0);
5107 rdev->pm.vblank_sync = true;
5108 wake_up(&rdev->irq.vblank_queue);
5109 }
5110 if (atomic_read(&rdev->irq.pflip[0]))
5111 radeon_crtc_handle_flip(rdev, 0);
5112 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5113 DRM_DEBUG("IH: D1 vblank\n");
5114 }
5115 break;
5116 case 1: /* D1 vline */
5117 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5118 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5119 DRM_DEBUG("IH: D1 vline\n");
5120 }
5121 break;
5122 default:
5123 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5124 break;
5125 }
5126 break;
5127 case 2: /* D2 vblank/vline */
5128 switch (src_data) {
5129 case 0: /* D2 vblank */
5130 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5131 if (rdev->irq.crtc_vblank_int[1]) {
5132 drm_handle_vblank(rdev->ddev, 1);
5133 rdev->pm.vblank_sync = true;
5134 wake_up(&rdev->irq.vblank_queue);
5135 }
5136 if (atomic_read(&rdev->irq.pflip[1]))
5137 radeon_crtc_handle_flip(rdev, 1);
5138 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5139 DRM_DEBUG("IH: D2 vblank\n");
5140 }
5141 break;
5142 case 1: /* D2 vline */
5143 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5144 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5145 DRM_DEBUG("IH: D2 vline\n");
5146 }
5147 break;
5148 default:
5149 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5150 break;
5151 }
5152 break;
5153 case 3: /* D3 vblank/vline */
5154 switch (src_data) {
5155 case 0: /* D3 vblank */
5156 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5157 if (rdev->irq.crtc_vblank_int[2]) {
5158 drm_handle_vblank(rdev->ddev, 2);
5159 rdev->pm.vblank_sync = true;
5160 wake_up(&rdev->irq.vblank_queue);
5161 }
5162 if (atomic_read(&rdev->irq.pflip[2]))
5163 radeon_crtc_handle_flip(rdev, 2);
5164 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5165 DRM_DEBUG("IH: D3 vblank\n");
5166 }
5167 break;
5168 case 1: /* D3 vline */
5169 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5170 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5171 DRM_DEBUG("IH: D3 vline\n");
5172 }
5173 break;
5174 default:
5175 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5176 break;
5177 }
5178 break;
5179 case 4: /* D4 vblank/vline */
5180 switch (src_data) {
5181 case 0: /* D4 vblank */
5182 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5183 if (rdev->irq.crtc_vblank_int[3]) {
5184 drm_handle_vblank(rdev->ddev, 3);
5185 rdev->pm.vblank_sync = true;
5186 wake_up(&rdev->irq.vblank_queue);
5187 }
5188 if (atomic_read(&rdev->irq.pflip[3]))
5189 radeon_crtc_handle_flip(rdev, 3);
5190 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5191 DRM_DEBUG("IH: D4 vblank\n");
5192 }
5193 break;
5194 case 1: /* D4 vline */
5195 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5196 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5197 DRM_DEBUG("IH: D4 vline\n");
5198 }
5199 break;
5200 default:
5201 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5202 break;
5203 }
5204 break;
5205 case 5: /* D5 vblank/vline */
5206 switch (src_data) {
5207 case 0: /* D5 vblank */
5208 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5209 if (rdev->irq.crtc_vblank_int[4]) {
5210 drm_handle_vblank(rdev->ddev, 4);
5211 rdev->pm.vblank_sync = true;
5212 wake_up(&rdev->irq.vblank_queue);
5213 }
5214 if (atomic_read(&rdev->irq.pflip[4]))
5215 radeon_crtc_handle_flip(rdev, 4);
5216 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5217 DRM_DEBUG("IH: D5 vblank\n");
5218 }
5219 break;
5220 case 1: /* D5 vline */
5221 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5222 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5223 DRM_DEBUG("IH: D5 vline\n");
5224 }
5225 break;
5226 default:
5227 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5228 break;
5229 }
5230 break;
5231 case 6: /* D6 vblank/vline */
5232 switch (src_data) {
5233 case 0: /* D6 vblank */
5234 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5235 if (rdev->irq.crtc_vblank_int[5]) {
5236 drm_handle_vblank(rdev->ddev, 5);
5237 rdev->pm.vblank_sync = true;
5238 wake_up(&rdev->irq.vblank_queue);
5239 }
5240 if (atomic_read(&rdev->irq.pflip[5]))
5241 radeon_crtc_handle_flip(rdev, 5);
5242 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5243 DRM_DEBUG("IH: D6 vblank\n");
5244 }
5245 break;
5246 case 1: /* D6 vline */
5247 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5248 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5249 DRM_DEBUG("IH: D6 vline\n");
5250 }
5251 break;
5252 default:
5253 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5254 break;
5255 }
5256 break;
5257 case 42: /* HPD hotplug */
5258 switch (src_data) {
5259 case 0:
5260 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5261 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5262 queue_hotplug = true;
5263 DRM_DEBUG("IH: HPD1\n");
5264 }
5265 break;
5266 case 1:
5267 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5268 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5269 queue_hotplug = true;
5270 DRM_DEBUG("IH: HPD2\n");
5271 }
5272 break;
5273 case 2:
5274 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5275 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5276 queue_hotplug = true;
5277 DRM_DEBUG("IH: HPD3\n");
5278 }
5279 break;
5280 case 3:
5281 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5282 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5283 queue_hotplug = true;
5284 DRM_DEBUG("IH: HPD4\n");
5285 }
5286 break;
5287 case 4:
5288 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5289 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5290 queue_hotplug = true;
5291 DRM_DEBUG("IH: HPD5\n");
5292 }
5293 break;
5294 case 5:
5295 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5296 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5297 queue_hotplug = true;
5298 DRM_DEBUG("IH: HPD6\n");
5299 }
5300 break;
5301 default:
5302 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5303 break;
5304 }
5305 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005306 case 146:
5307 case 147:
5308 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5309 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5310 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5311 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5312 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5313 /* reset addr and status */
5314 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5315 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005316 case 176: /* GFX RB CP_INT */
5317 case 177: /* GFX IB CP_INT */
5318 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5319 break;
5320 case 181: /* CP EOP event */
5321 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005322 /* XXX check the bitfield order! */
5323 me_id = (ring_id & 0x60) >> 5;
5324 pipe_id = (ring_id & 0x18) >> 3;
5325 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005326 switch (me_id) {
5327 case 0:
5328 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5329 break;
5330 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05005331 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04005332 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5333 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5334 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5335 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05005336 break;
5337 }
5338 break;
5339 case 184: /* CP Privileged reg access */
5340 DRM_ERROR("Illegal register access in command stream\n");
5341 /* XXX check the bitfield order! */
5342 me_id = (ring_id & 0x60) >> 5;
5343 pipe_id = (ring_id & 0x18) >> 3;
5344 queue_id = (ring_id & 0x7) >> 0;
5345 switch (me_id) {
5346 case 0:
5347 /* This results in a full GPU reset, but all we need to do is soft
5348 * reset the CP for gfx
5349 */
5350 queue_reset = true;
5351 break;
5352 case 1:
5353 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005354 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005355 break;
5356 case 2:
5357 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005358 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005359 break;
5360 }
5361 break;
5362 case 185: /* CP Privileged inst */
5363 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005364 /* XXX check the bitfield order! */
5365 me_id = (ring_id & 0x60) >> 5;
5366 pipe_id = (ring_id & 0x18) >> 3;
5367 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005368 switch (me_id) {
5369 case 0:
5370 /* This results in a full GPU reset, but all we need to do is soft
5371 * reset the CP for gfx
5372 */
5373 queue_reset = true;
5374 break;
5375 case 1:
5376 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005377 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005378 break;
5379 case 2:
5380 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005381 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005382 break;
5383 }
5384 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005385 case 224: /* SDMA trap event */
5386 /* XXX check the bitfield order! */
5387 me_id = (ring_id & 0x3) >> 0;
5388 queue_id = (ring_id & 0xc) >> 2;
5389 DRM_DEBUG("IH: SDMA trap\n");
5390 switch (me_id) {
5391 case 0:
5392 switch (queue_id) {
5393 case 0:
5394 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5395 break;
5396 case 1:
5397 /* XXX compute */
5398 break;
5399 case 2:
5400 /* XXX compute */
5401 break;
5402 }
5403 break;
5404 case 1:
5405 switch (queue_id) {
5406 case 0:
5407 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5408 break;
5409 case 1:
5410 /* XXX compute */
5411 break;
5412 case 2:
5413 /* XXX compute */
5414 break;
5415 }
5416 break;
5417 }
5418 break;
5419 case 241: /* SDMA Privileged inst */
5420 case 247: /* SDMA Privileged inst */
5421 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5422 /* XXX check the bitfield order! */
5423 me_id = (ring_id & 0x3) >> 0;
5424 queue_id = (ring_id & 0xc) >> 2;
5425 switch (me_id) {
5426 case 0:
5427 switch (queue_id) {
5428 case 0:
5429 queue_reset = true;
5430 break;
5431 case 1:
5432 /* XXX compute */
5433 queue_reset = true;
5434 break;
5435 case 2:
5436 /* XXX compute */
5437 queue_reset = true;
5438 break;
5439 }
5440 break;
5441 case 1:
5442 switch (queue_id) {
5443 case 0:
5444 queue_reset = true;
5445 break;
5446 case 1:
5447 /* XXX compute */
5448 queue_reset = true;
5449 break;
5450 case 2:
5451 /* XXX compute */
5452 queue_reset = true;
5453 break;
5454 }
5455 break;
5456 }
5457 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005458 case 233: /* GUI IDLE */
5459 DRM_DEBUG("IH: GUI idle\n");
5460 break;
5461 default:
5462 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5463 break;
5464 }
5465
5466 /* wptr/rptr are in bytes! */
5467 rptr += 16;
5468 rptr &= rdev->ih.ptr_mask;
5469 }
5470 if (queue_hotplug)
5471 schedule_work(&rdev->hotplug_work);
5472 if (queue_reset)
5473 schedule_work(&rdev->reset_work);
5474 rdev->ih.rptr = rptr;
5475 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5476 atomic_set(&rdev->ih.lock, 0);
5477
5478 /* make sure wptr hasn't changed while processing */
5479 wptr = cik_get_ih_wptr(rdev);
5480 if (wptr != rptr)
5481 goto restart_ih;
5482
5483 return IRQ_HANDLED;
5484}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005485
5486/*
5487 * startup/shutdown callbacks
5488 */
5489/**
5490 * cik_startup - program the asic to a functional state
5491 *
5492 * @rdev: radeon_device pointer
5493 *
5494 * Programs the asic to a functional state (CIK).
5495 * Called by cik_init() and cik_resume().
5496 * Returns 0 for success, error for failure.
5497 */
5498static int cik_startup(struct radeon_device *rdev)
5499{
5500 struct radeon_ring *ring;
5501 int r;
5502
5503 if (rdev->flags & RADEON_IS_IGP) {
5504 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5505 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5506 r = cik_init_microcode(rdev);
5507 if (r) {
5508 DRM_ERROR("Failed to load firmware!\n");
5509 return r;
5510 }
5511 }
5512 } else {
5513 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5514 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5515 !rdev->mc_fw) {
5516 r = cik_init_microcode(rdev);
5517 if (r) {
5518 DRM_ERROR("Failed to load firmware!\n");
5519 return r;
5520 }
5521 }
5522
5523 r = ci_mc_load_microcode(rdev);
5524 if (r) {
5525 DRM_ERROR("Failed to load MC firmware!\n");
5526 return r;
5527 }
5528 }
5529
5530 r = r600_vram_scratch_init(rdev);
5531 if (r)
5532 return r;
5533
5534 cik_mc_program(rdev);
5535 r = cik_pcie_gart_enable(rdev);
5536 if (r)
5537 return r;
5538 cik_gpu_init(rdev);
5539
5540 /* allocate rlc buffers */
5541 r = si_rlc_init(rdev);
5542 if (r) {
5543 DRM_ERROR("Failed to init rlc BOs!\n");
5544 return r;
5545 }
5546
5547 /* allocate wb buffer */
5548 r = radeon_wb_init(rdev);
5549 if (r)
5550 return r;
5551
Alex Deucher963e81f2013-06-26 17:37:11 -04005552 /* allocate mec buffers */
5553 r = cik_mec_init(rdev);
5554 if (r) {
5555 DRM_ERROR("Failed to init MEC BOs!\n");
5556 return r;
5557 }
5558
Alex Deucher7bf94a22012-08-17 11:48:29 -04005559 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5560 if (r) {
5561 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5562 return r;
5563 }
5564
Alex Deucher963e81f2013-06-26 17:37:11 -04005565 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5566 if (r) {
5567 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5568 return r;
5569 }
5570
5571 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5572 if (r) {
5573 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5574 return r;
5575 }
5576
Alex Deucher7bf94a22012-08-17 11:48:29 -04005577 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5578 if (r) {
5579 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5580 return r;
5581 }
5582
5583 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5584 if (r) {
5585 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5586 return r;
5587 }
5588
Christian König87167bb2013-04-09 13:39:21 -04005589 r = cik_uvd_resume(rdev);
5590 if (!r) {
5591 r = radeon_fence_driver_start_ring(rdev,
5592 R600_RING_TYPE_UVD_INDEX);
5593 if (r)
5594 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5595 }
5596 if (r)
5597 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5598
Alex Deucher7bf94a22012-08-17 11:48:29 -04005599 /* Enable IRQ */
5600 if (!rdev->irq.installed) {
5601 r = radeon_irq_kms_init(rdev);
5602 if (r)
5603 return r;
5604 }
5605
5606 r = cik_irq_init(rdev);
5607 if (r) {
5608 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5609 radeon_irq_kms_fini(rdev);
5610 return r;
5611 }
5612 cik_irq_set(rdev);
5613
5614 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5615 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5616 CP_RB0_RPTR, CP_RB0_WPTR,
5617 0, 0xfffff, RADEON_CP_PACKET2);
5618 if (r)
5619 return r;
5620
Alex Deucher963e81f2013-06-26 17:37:11 -04005621 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04005622 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04005623 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5624 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5625 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04005626 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04005627 if (r)
5628 return r;
5629 ring->me = 1; /* first MEC */
5630 ring->pipe = 0; /* first pipe */
5631 ring->queue = 0; /* first queue */
5632 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
5633
Alex Deucher2615b532013-06-03 11:21:58 -04005634 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04005635 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5636 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5637 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04005638 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04005639 if (r)
5640 return r;
5641 /* dGPU only have 1 MEC */
5642 ring->me = 1; /* first MEC */
5643 ring->pipe = 0; /* first pipe */
5644 ring->queue = 1; /* second queue */
5645 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
5646
Alex Deucher7bf94a22012-08-17 11:48:29 -04005647 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5648 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5649 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
5650 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
5651 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5652 if (r)
5653 return r;
5654
5655 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5656 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5657 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
5658 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
5659 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5660 if (r)
5661 return r;
5662
5663 r = cik_cp_resume(rdev);
5664 if (r)
5665 return r;
5666
5667 r = cik_sdma_resume(rdev);
5668 if (r)
5669 return r;
5670
Christian König87167bb2013-04-09 13:39:21 -04005671 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5672 if (ring->ring_size) {
5673 r = radeon_ring_init(rdev, ring, ring->ring_size,
5674 R600_WB_UVD_RPTR_OFFSET,
5675 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5676 0, 0xfffff, RADEON_CP_PACKET2);
5677 if (!r)
5678 r = r600_uvd_init(rdev);
5679 if (r)
5680 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5681 }
5682
Alex Deucher7bf94a22012-08-17 11:48:29 -04005683 r = radeon_ib_pool_init(rdev);
5684 if (r) {
5685 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5686 return r;
5687 }
5688
5689 r = radeon_vm_manager_init(rdev);
5690 if (r) {
5691 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5692 return r;
5693 }
5694
5695 return 0;
5696}
5697
5698/**
5699 * cik_resume - resume the asic to a functional state
5700 *
5701 * @rdev: radeon_device pointer
5702 *
5703 * Programs the asic to a functional state (CIK).
5704 * Called at resume.
5705 * Returns 0 for success, error for failure.
5706 */
5707int cik_resume(struct radeon_device *rdev)
5708{
5709 int r;
5710
5711 /* post card */
5712 atom_asic_init(rdev->mode_info.atom_context);
5713
5714 rdev->accel_working = true;
5715 r = cik_startup(rdev);
5716 if (r) {
5717 DRM_ERROR("cik startup failed on resume\n");
5718 rdev->accel_working = false;
5719 return r;
5720 }
5721
5722 return r;
5723
5724}
5725
5726/**
5727 * cik_suspend - suspend the asic
5728 *
5729 * @rdev: radeon_device pointer
5730 *
5731 * Bring the chip into a state suitable for suspend (CIK).
5732 * Called at suspend.
5733 * Returns 0 for success.
5734 */
5735int cik_suspend(struct radeon_device *rdev)
5736{
5737 radeon_vm_manager_fini(rdev);
5738 cik_cp_enable(rdev, false);
5739 cik_sdma_enable(rdev, false);
Christian König87167bb2013-04-09 13:39:21 -04005740 r600_uvd_rbc_stop(rdev);
5741 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005742 cik_irq_suspend(rdev);
5743 radeon_wb_disable(rdev);
5744 cik_pcie_gart_disable(rdev);
5745 return 0;
5746}
5747
5748/* Plan is to move initialization in that function and use
5749 * helper function so that radeon_device_init pretty much
5750 * do nothing more than calling asic specific function. This
5751 * should also allow to remove a bunch of callback function
5752 * like vram_info.
5753 */
5754/**
5755 * cik_init - asic specific driver and hw init
5756 *
5757 * @rdev: radeon_device pointer
5758 *
5759 * Setup asic specific driver variables and program the hw
5760 * to a functional state (CIK).
5761 * Called at driver startup.
5762 * Returns 0 for success, errors for failure.
5763 */
5764int cik_init(struct radeon_device *rdev)
5765{
5766 struct radeon_ring *ring;
5767 int r;
5768
5769 /* Read BIOS */
5770 if (!radeon_get_bios(rdev)) {
5771 if (ASIC_IS_AVIVO(rdev))
5772 return -EINVAL;
5773 }
5774 /* Must be an ATOMBIOS */
5775 if (!rdev->is_atom_bios) {
5776 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5777 return -EINVAL;
5778 }
5779 r = radeon_atombios_init(rdev);
5780 if (r)
5781 return r;
5782
5783 /* Post card if necessary */
5784 if (!radeon_card_posted(rdev)) {
5785 if (!rdev->bios) {
5786 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5787 return -EINVAL;
5788 }
5789 DRM_INFO("GPU not posted. posting now...\n");
5790 atom_asic_init(rdev->mode_info.atom_context);
5791 }
5792 /* Initialize scratch registers */
5793 cik_scratch_init(rdev);
5794 /* Initialize surface registers */
5795 radeon_surface_init(rdev);
5796 /* Initialize clocks */
5797 radeon_get_clock_info(rdev->ddev);
5798
5799 /* Fence driver */
5800 r = radeon_fence_driver_init(rdev);
5801 if (r)
5802 return r;
5803
5804 /* initialize memory controller */
5805 r = cik_mc_init(rdev);
5806 if (r)
5807 return r;
5808 /* Memory manager */
5809 r = radeon_bo_init(rdev);
5810 if (r)
5811 return r;
5812
5813 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5814 ring->ring_obj = NULL;
5815 r600_ring_init(rdev, ring, 1024 * 1024);
5816
Alex Deucher963e81f2013-06-26 17:37:11 -04005817 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5818 ring->ring_obj = NULL;
5819 r600_ring_init(rdev, ring, 1024 * 1024);
5820 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
5821 if (r)
5822 return r;
5823
5824 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5825 ring->ring_obj = NULL;
5826 r600_ring_init(rdev, ring, 1024 * 1024);
5827 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
5828 if (r)
5829 return r;
5830
Alex Deucher7bf94a22012-08-17 11:48:29 -04005831 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5832 ring->ring_obj = NULL;
5833 r600_ring_init(rdev, ring, 256 * 1024);
5834
5835 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5836 ring->ring_obj = NULL;
5837 r600_ring_init(rdev, ring, 256 * 1024);
5838
Christian König87167bb2013-04-09 13:39:21 -04005839 r = radeon_uvd_init(rdev);
5840 if (!r) {
5841 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5842 ring->ring_obj = NULL;
5843 r600_ring_init(rdev, ring, 4096);
5844 }
5845
Alex Deucher7bf94a22012-08-17 11:48:29 -04005846 rdev->ih.ring_obj = NULL;
5847 r600_ih_ring_init(rdev, 64 * 1024);
5848
5849 r = r600_pcie_gart_init(rdev);
5850 if (r)
5851 return r;
5852
5853 rdev->accel_working = true;
5854 r = cik_startup(rdev);
5855 if (r) {
5856 dev_err(rdev->dev, "disabling GPU acceleration\n");
5857 cik_cp_fini(rdev);
5858 cik_sdma_fini(rdev);
5859 cik_irq_fini(rdev);
5860 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04005861 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005862 radeon_wb_fini(rdev);
5863 radeon_ib_pool_fini(rdev);
5864 radeon_vm_manager_fini(rdev);
5865 radeon_irq_kms_fini(rdev);
5866 cik_pcie_gart_fini(rdev);
5867 rdev->accel_working = false;
5868 }
5869
5870 /* Don't start up if the MC ucode is missing.
5871 * The default clocks and voltages before the MC ucode
5872 * is loaded are not suffient for advanced operations.
5873 */
5874 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5875 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5876 return -EINVAL;
5877 }
5878
5879 return 0;
5880}
5881
5882/**
5883 * cik_fini - asic specific driver and hw fini
5884 *
5885 * @rdev: radeon_device pointer
5886 *
5887 * Tear down the asic specific driver variables and program the hw
5888 * to an idle state (CIK).
5889 * Called at driver unload.
5890 */
5891void cik_fini(struct radeon_device *rdev)
5892{
5893 cik_cp_fini(rdev);
5894 cik_sdma_fini(rdev);
5895 cik_irq_fini(rdev);
5896 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04005897 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005898 radeon_wb_fini(rdev);
5899 radeon_vm_manager_fini(rdev);
5900 radeon_ib_pool_fini(rdev);
5901 radeon_irq_kms_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04005902 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005903 cik_pcie_gart_fini(rdev);
5904 r600_vram_scratch_fini(rdev);
5905 radeon_gem_fini(rdev);
5906 radeon_fence_driver_fini(rdev);
5907 radeon_bo_fini(rdev);
5908 radeon_atombios_fini(rdev);
5909 kfree(rdev->bios);
5910 rdev->bios = NULL;
5911}
Alex Deuchercd84a272012-07-20 17:13:13 -04005912
5913/* display watermark setup */
5914/**
5915 * dce8_line_buffer_adjust - Set up the line buffer
5916 *
5917 * @rdev: radeon_device pointer
5918 * @radeon_crtc: the selected display controller
5919 * @mode: the current display mode on the selected display
5920 * controller
5921 *
5922 * Setup up the line buffer allocation for
5923 * the selected display controller (CIK).
5924 * Returns the line buffer size in pixels.
5925 */
5926static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5927 struct radeon_crtc *radeon_crtc,
5928 struct drm_display_mode *mode)
5929{
5930 u32 tmp;
5931
5932 /*
5933 * Line Buffer Setup
5934 * There are 6 line buffers, one for each display controllers.
5935 * There are 3 partitions per LB. Select the number of partitions
5936 * to enable based on the display width. For display widths larger
5937 * than 4096, you need use to use 2 display controllers and combine
5938 * them using the stereo blender.
5939 */
5940 if (radeon_crtc->base.enabled && mode) {
5941 if (mode->crtc_hdisplay < 1920)
5942 tmp = 1;
5943 else if (mode->crtc_hdisplay < 2560)
5944 tmp = 2;
5945 else if (mode->crtc_hdisplay < 4096)
5946 tmp = 0;
5947 else {
5948 DRM_DEBUG_KMS("Mode too big for LB!\n");
5949 tmp = 0;
5950 }
5951 } else
5952 tmp = 1;
5953
5954 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5955 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5956
5957 if (radeon_crtc->base.enabled && mode) {
5958 switch (tmp) {
5959 case 0:
5960 default:
5961 return 4096 * 2;
5962 case 1:
5963 return 1920 * 2;
5964 case 2:
5965 return 2560 * 2;
5966 }
5967 }
5968
5969 /* controller not enabled, so no lb used */
5970 return 0;
5971}
5972
5973/**
5974 * cik_get_number_of_dram_channels - get the number of dram channels
5975 *
5976 * @rdev: radeon_device pointer
5977 *
5978 * Look up the number of video ram channels (CIK).
5979 * Used for display watermark bandwidth calculations
5980 * Returns the number of dram channels
5981 */
5982static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5983{
5984 u32 tmp = RREG32(MC_SHARED_CHMAP);
5985
5986 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5987 case 0:
5988 default:
5989 return 1;
5990 case 1:
5991 return 2;
5992 case 2:
5993 return 4;
5994 case 3:
5995 return 8;
5996 case 4:
5997 return 3;
5998 case 5:
5999 return 6;
6000 case 6:
6001 return 10;
6002 case 7:
6003 return 12;
6004 case 8:
6005 return 16;
6006 }
6007}
6008
6009struct dce8_wm_params {
6010 u32 dram_channels; /* number of dram channels */
6011 u32 yclk; /* bandwidth per dram data pin in kHz */
6012 u32 sclk; /* engine clock in kHz */
6013 u32 disp_clk; /* display clock in kHz */
6014 u32 src_width; /* viewport width */
6015 u32 active_time; /* active display time in ns */
6016 u32 blank_time; /* blank time in ns */
6017 bool interlaced; /* mode is interlaced */
6018 fixed20_12 vsc; /* vertical scale ratio */
6019 u32 num_heads; /* number of active crtcs */
6020 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6021 u32 lb_size; /* line buffer allocated to pipe */
6022 u32 vtaps; /* vertical scaler taps */
6023};
6024
6025/**
6026 * dce8_dram_bandwidth - get the dram bandwidth
6027 *
6028 * @wm: watermark calculation data
6029 *
6030 * Calculate the raw dram bandwidth (CIK).
6031 * Used for display watermark bandwidth calculations
6032 * Returns the dram bandwidth in MBytes/s
6033 */
6034static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6035{
6036 /* Calculate raw DRAM Bandwidth */
6037 fixed20_12 dram_efficiency; /* 0.7 */
6038 fixed20_12 yclk, dram_channels, bandwidth;
6039 fixed20_12 a;
6040
6041 a.full = dfixed_const(1000);
6042 yclk.full = dfixed_const(wm->yclk);
6043 yclk.full = dfixed_div(yclk, a);
6044 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6045 a.full = dfixed_const(10);
6046 dram_efficiency.full = dfixed_const(7);
6047 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6048 bandwidth.full = dfixed_mul(dram_channels, yclk);
6049 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6050
6051 return dfixed_trunc(bandwidth);
6052}
6053
6054/**
6055 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6056 *
6057 * @wm: watermark calculation data
6058 *
6059 * Calculate the dram bandwidth used for display (CIK).
6060 * Used for display watermark bandwidth calculations
6061 * Returns the dram bandwidth for display in MBytes/s
6062 */
6063static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6064{
6065 /* Calculate DRAM Bandwidth and the part allocated to display. */
6066 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6067 fixed20_12 yclk, dram_channels, bandwidth;
6068 fixed20_12 a;
6069
6070 a.full = dfixed_const(1000);
6071 yclk.full = dfixed_const(wm->yclk);
6072 yclk.full = dfixed_div(yclk, a);
6073 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6074 a.full = dfixed_const(10);
6075 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6076 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6077 bandwidth.full = dfixed_mul(dram_channels, yclk);
6078 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6079
6080 return dfixed_trunc(bandwidth);
6081}
6082
6083/**
6084 * dce8_data_return_bandwidth - get the data return bandwidth
6085 *
6086 * @wm: watermark calculation data
6087 *
6088 * Calculate the data return bandwidth used for display (CIK).
6089 * Used for display watermark bandwidth calculations
6090 * Returns the data return bandwidth in MBytes/s
6091 */
6092static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6093{
6094 /* Calculate the display Data return Bandwidth */
6095 fixed20_12 return_efficiency; /* 0.8 */
6096 fixed20_12 sclk, bandwidth;
6097 fixed20_12 a;
6098
6099 a.full = dfixed_const(1000);
6100 sclk.full = dfixed_const(wm->sclk);
6101 sclk.full = dfixed_div(sclk, a);
6102 a.full = dfixed_const(10);
6103 return_efficiency.full = dfixed_const(8);
6104 return_efficiency.full = dfixed_div(return_efficiency, a);
6105 a.full = dfixed_const(32);
6106 bandwidth.full = dfixed_mul(a, sclk);
6107 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6108
6109 return dfixed_trunc(bandwidth);
6110}
6111
6112/**
6113 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6114 *
6115 * @wm: watermark calculation data
6116 *
6117 * Calculate the dmif bandwidth used for display (CIK).
6118 * Used for display watermark bandwidth calculations
6119 * Returns the dmif bandwidth in MBytes/s
6120 */
6121static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6122{
6123 /* Calculate the DMIF Request Bandwidth */
6124 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6125 fixed20_12 disp_clk, bandwidth;
6126 fixed20_12 a, b;
6127
6128 a.full = dfixed_const(1000);
6129 disp_clk.full = dfixed_const(wm->disp_clk);
6130 disp_clk.full = dfixed_div(disp_clk, a);
6131 a.full = dfixed_const(32);
6132 b.full = dfixed_mul(a, disp_clk);
6133
6134 a.full = dfixed_const(10);
6135 disp_clk_request_efficiency.full = dfixed_const(8);
6136 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6137
6138 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6139
6140 return dfixed_trunc(bandwidth);
6141}
6142
6143/**
6144 * dce8_available_bandwidth - get the min available bandwidth
6145 *
6146 * @wm: watermark calculation data
6147 *
6148 * Calculate the min available bandwidth used for display (CIK).
6149 * Used for display watermark bandwidth calculations
6150 * Returns the min available bandwidth in MBytes/s
6151 */
6152static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6153{
6154 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6155 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6156 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6157 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6158
6159 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6160}
6161
6162/**
6163 * dce8_average_bandwidth - get the average available bandwidth
6164 *
6165 * @wm: watermark calculation data
6166 *
6167 * Calculate the average available bandwidth used for display (CIK).
6168 * Used for display watermark bandwidth calculations
6169 * Returns the average available bandwidth in MBytes/s
6170 */
6171static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6172{
6173 /* Calculate the display mode Average Bandwidth
6174 * DisplayMode should contain the source and destination dimensions,
6175 * timing, etc.
6176 */
6177 fixed20_12 bpp;
6178 fixed20_12 line_time;
6179 fixed20_12 src_width;
6180 fixed20_12 bandwidth;
6181 fixed20_12 a;
6182
6183 a.full = dfixed_const(1000);
6184 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6185 line_time.full = dfixed_div(line_time, a);
6186 bpp.full = dfixed_const(wm->bytes_per_pixel);
6187 src_width.full = dfixed_const(wm->src_width);
6188 bandwidth.full = dfixed_mul(src_width, bpp);
6189 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6190 bandwidth.full = dfixed_div(bandwidth, line_time);
6191
6192 return dfixed_trunc(bandwidth);
6193}
6194
6195/**
6196 * dce8_latency_watermark - get the latency watermark
6197 *
6198 * @wm: watermark calculation data
6199 *
6200 * Calculate the latency watermark (CIK).
6201 * Used for display watermark bandwidth calculations
6202 * Returns the latency watermark in ns
6203 */
6204static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6205{
6206 /* First calculate the latency in ns */
6207 u32 mc_latency = 2000; /* 2000 ns. */
6208 u32 available_bandwidth = dce8_available_bandwidth(wm);
6209 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6210 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6211 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6212 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6213 (wm->num_heads * cursor_line_pair_return_time);
6214 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6215 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6216 u32 tmp, dmif_size = 12288;
6217 fixed20_12 a, b, c;
6218
6219 if (wm->num_heads == 0)
6220 return 0;
6221
6222 a.full = dfixed_const(2);
6223 b.full = dfixed_const(1);
6224 if ((wm->vsc.full > a.full) ||
6225 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6226 (wm->vtaps >= 5) ||
6227 ((wm->vsc.full >= a.full) && wm->interlaced))
6228 max_src_lines_per_dst_line = 4;
6229 else
6230 max_src_lines_per_dst_line = 2;
6231
6232 a.full = dfixed_const(available_bandwidth);
6233 b.full = dfixed_const(wm->num_heads);
6234 a.full = dfixed_div(a, b);
6235
6236 b.full = dfixed_const(mc_latency + 512);
6237 c.full = dfixed_const(wm->disp_clk);
6238 b.full = dfixed_div(b, c);
6239
6240 c.full = dfixed_const(dmif_size);
6241 b.full = dfixed_div(c, b);
6242
6243 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6244
6245 b.full = dfixed_const(1000);
6246 c.full = dfixed_const(wm->disp_clk);
6247 b.full = dfixed_div(c, b);
6248 c.full = dfixed_const(wm->bytes_per_pixel);
6249 b.full = dfixed_mul(b, c);
6250
6251 lb_fill_bw = min(tmp, dfixed_trunc(b));
6252
6253 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6254 b.full = dfixed_const(1000);
6255 c.full = dfixed_const(lb_fill_bw);
6256 b.full = dfixed_div(c, b);
6257 a.full = dfixed_div(a, b);
6258 line_fill_time = dfixed_trunc(a);
6259
6260 if (line_fill_time < wm->active_time)
6261 return latency;
6262 else
6263 return latency + (line_fill_time - wm->active_time);
6264
6265}
6266
6267/**
6268 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6269 * average and available dram bandwidth
6270 *
6271 * @wm: watermark calculation data
6272 *
6273 * Check if the display average bandwidth fits in the display
6274 * dram bandwidth (CIK).
6275 * Used for display watermark bandwidth calculations
6276 * Returns true if the display fits, false if not.
6277 */
6278static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6279{
6280 if (dce8_average_bandwidth(wm) <=
6281 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6282 return true;
6283 else
6284 return false;
6285}
6286
6287/**
6288 * dce8_average_bandwidth_vs_available_bandwidth - check
6289 * average and available bandwidth
6290 *
6291 * @wm: watermark calculation data
6292 *
6293 * Check if the display average bandwidth fits in the display
6294 * available bandwidth (CIK).
6295 * Used for display watermark bandwidth calculations
6296 * Returns true if the display fits, false if not.
6297 */
6298static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6299{
6300 if (dce8_average_bandwidth(wm) <=
6301 (dce8_available_bandwidth(wm) / wm->num_heads))
6302 return true;
6303 else
6304 return false;
6305}
6306
6307/**
6308 * dce8_check_latency_hiding - check latency hiding
6309 *
6310 * @wm: watermark calculation data
6311 *
6312 * Check latency hiding (CIK).
6313 * Used for display watermark bandwidth calculations
6314 * Returns true if the display fits, false if not.
6315 */
6316static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6317{
6318 u32 lb_partitions = wm->lb_size / wm->src_width;
6319 u32 line_time = wm->active_time + wm->blank_time;
6320 u32 latency_tolerant_lines;
6321 u32 latency_hiding;
6322 fixed20_12 a;
6323
6324 a.full = dfixed_const(1);
6325 if (wm->vsc.full > a.full)
6326 latency_tolerant_lines = 1;
6327 else {
6328 if (lb_partitions <= (wm->vtaps + 1))
6329 latency_tolerant_lines = 1;
6330 else
6331 latency_tolerant_lines = 2;
6332 }
6333
6334 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6335
6336 if (dce8_latency_watermark(wm) <= latency_hiding)
6337 return true;
6338 else
6339 return false;
6340}
6341
6342/**
6343 * dce8_program_watermarks - program display watermarks
6344 *
6345 * @rdev: radeon_device pointer
6346 * @radeon_crtc: the selected display controller
6347 * @lb_size: line buffer size
6348 * @num_heads: number of display controllers in use
6349 *
6350 * Calculate and program the display watermarks for the
6351 * selected display controller (CIK).
6352 */
6353static void dce8_program_watermarks(struct radeon_device *rdev,
6354 struct radeon_crtc *radeon_crtc,
6355 u32 lb_size, u32 num_heads)
6356{
6357 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6358 struct dce8_wm_params wm;
6359 u32 pixel_period;
6360 u32 line_time = 0;
6361 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6362 u32 tmp, wm_mask;
6363
6364 if (radeon_crtc->base.enabled && num_heads && mode) {
6365 pixel_period = 1000000 / (u32)mode->clock;
6366 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6367
6368 wm.yclk = rdev->pm.current_mclk * 10;
6369 wm.sclk = rdev->pm.current_sclk * 10;
6370 wm.disp_clk = mode->clock;
6371 wm.src_width = mode->crtc_hdisplay;
6372 wm.active_time = mode->crtc_hdisplay * pixel_period;
6373 wm.blank_time = line_time - wm.active_time;
6374 wm.interlaced = false;
6375 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6376 wm.interlaced = true;
6377 wm.vsc = radeon_crtc->vsc;
6378 wm.vtaps = 1;
6379 if (radeon_crtc->rmx_type != RMX_OFF)
6380 wm.vtaps = 2;
6381 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6382 wm.lb_size = lb_size;
6383 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6384 wm.num_heads = num_heads;
6385
6386 /* set for high clocks */
6387 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6388 /* set for low clocks */
6389 /* wm.yclk = low clk; wm.sclk = low clk */
6390 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6391
6392 /* possibly force display priority to high */
6393 /* should really do this at mode validation time... */
6394 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6395 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6396 !dce8_check_latency_hiding(&wm) ||
6397 (rdev->disp_priority == 2)) {
6398 DRM_DEBUG_KMS("force priority to high\n");
6399 }
6400 }
6401
6402 /* select wm A */
6403 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6404 tmp = wm_mask;
6405 tmp &= ~LATENCY_WATERMARK_MASK(3);
6406 tmp |= LATENCY_WATERMARK_MASK(1);
6407 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6408 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6409 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6410 LATENCY_HIGH_WATERMARK(line_time)));
6411 /* select wm B */
6412 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6413 tmp &= ~LATENCY_WATERMARK_MASK(3);
6414 tmp |= LATENCY_WATERMARK_MASK(2);
6415 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6416 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6417 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6418 LATENCY_HIGH_WATERMARK(line_time)));
6419 /* restore original selection */
6420 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6421}
6422
6423/**
6424 * dce8_bandwidth_update - program display watermarks
6425 *
6426 * @rdev: radeon_device pointer
6427 *
6428 * Calculate and program the display watermarks and line
6429 * buffer allocation (CIK).
6430 */
6431void dce8_bandwidth_update(struct radeon_device *rdev)
6432{
6433 struct drm_display_mode *mode = NULL;
6434 u32 num_heads = 0, lb_size;
6435 int i;
6436
6437 radeon_update_display_priority(rdev);
6438
6439 for (i = 0; i < rdev->num_crtc; i++) {
6440 if (rdev->mode_info.crtcs[i]->base.enabled)
6441 num_heads++;
6442 }
6443 for (i = 0; i < rdev->num_crtc; i++) {
6444 mode = &rdev->mode_info.crtcs[i]->base.mode;
6445 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6446 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6447 }
6448}
Alex Deucher44fa3462012-12-18 22:17:00 -05006449
6450/**
6451 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6452 *
6453 * @rdev: radeon_device pointer
6454 *
6455 * Fetches a GPU clock counter snapshot (SI).
6456 * Returns the 64 bit clock counter snapshot.
6457 */
6458uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6459{
6460 uint64_t clock;
6461
6462 mutex_lock(&rdev->gpu_clock_mutex);
6463 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6464 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6465 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6466 mutex_unlock(&rdev->gpu_clock_mutex);
6467 return clock;
6468}
6469
Christian König87167bb2013-04-09 13:39:21 -04006470static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6471 u32 cntl_reg, u32 status_reg)
6472{
6473 int r, i;
6474 struct atom_clock_dividers dividers;
6475 uint32_t tmp;
6476
6477 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6478 clock, false, &dividers);
6479 if (r)
6480 return r;
6481
6482 tmp = RREG32_SMC(cntl_reg);
6483 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6484 tmp |= dividers.post_divider;
6485 WREG32_SMC(cntl_reg, tmp);
6486
6487 for (i = 0; i < 100; i++) {
6488 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6489 break;
6490 mdelay(10);
6491 }
6492 if (i == 100)
6493 return -ETIMEDOUT;
6494
6495 return 0;
6496}
6497
6498int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6499{
6500 int r = 0;
6501
6502 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6503 if (r)
6504 return r;
6505
6506 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6507 return r;
6508}
6509
6510int cik_uvd_resume(struct radeon_device *rdev)
6511{
6512 uint64_t addr;
6513 uint32_t size;
6514 int r;
6515
6516 r = radeon_uvd_resume(rdev);
6517 if (r)
6518 return r;
6519
6520 /* programm the VCPU memory controller bits 0-27 */
6521 addr = rdev->uvd.gpu_addr >> 3;
6522 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6523 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6524 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6525
6526 addr += size;
6527 size = RADEON_UVD_STACK_SIZE >> 3;
6528 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6529 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6530
6531 addr += size;
6532 size = RADEON_UVD_HEAP_SIZE >> 3;
6533 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6534 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6535
6536 /* bits 28-31 */
6537 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6538 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6539
6540 /* bits 32-39 */
6541 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6542 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6543
6544 return 0;
6545}