blob: 30b379e3f3f55d94483a69c947ba56d3e1c5b5eb [file] [log] [blame]
Alex Deucher43b3cd92012-03-20 17:18:00 -04001/*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include "drmP.h"
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "radeon_drm.h"
28#include "sid.h"
29#include "atom.h"
30
Alex Deucher0a96d722012-03-20 17:18:11 -040031extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
Alex Deucherc476dde2012-03-20 17:18:12 -040032extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
33extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deucher0a96d722012-03-20 17:18:11 -040034
Alex Deucher1bd47d22012-03-20 17:18:10 -040035/* get temperature in millidegrees */
36int si_get_temp(struct radeon_device *rdev)
37{
38 u32 temp;
39 int actual_temp = 0;
40
41 temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
42 CTF_TEMP_SHIFT;
43
44 if (temp & 0x200)
45 actual_temp = 255;
46 else
47 actual_temp = temp & 0x1ff;
48
49 actual_temp = (actual_temp * 1000);
50
51 return actual_temp;
52}
53
Alex Deucher43b3cd92012-03-20 17:18:00 -040054/* watermark setup */
55static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
56 struct radeon_crtc *radeon_crtc,
57 struct drm_display_mode *mode,
58 struct drm_display_mode *other_mode)
59{
60 u32 tmp;
61 /*
62 * Line Buffer Setup
63 * There are 3 line buffers, each one shared by 2 display controllers.
64 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
65 * the display controllers. The paritioning is done via one of four
66 * preset allocations specified in bits 21:20:
67 * 0 - half lb
68 * 2 - whole lb, other crtc must be disabled
69 */
70 /* this can get tricky if we have two large displays on a paired group
71 * of crtcs. Ideally for multiple large displays we'd assign them to
72 * non-linked crtcs for maximum line buffer allocation.
73 */
74 if (radeon_crtc->base.enabled && mode) {
75 if (other_mode)
76 tmp = 0; /* 1/2 */
77 else
78 tmp = 2; /* whole */
79 } else
80 tmp = 0;
81
82 WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
83 DC_LB_MEMORY_CONFIG(tmp));
84
85 if (radeon_crtc->base.enabled && mode) {
86 switch (tmp) {
87 case 0:
88 default:
89 return 4096 * 2;
90 case 2:
91 return 8192 * 2;
92 }
93 }
94
95 /* controller not enabled, so no lb used */
96 return 0;
97}
98
99static u32 dce6_get_number_of_dram_channels(struct radeon_device *rdev)
100{
101 u32 tmp = RREG32(MC_SHARED_CHMAP);
102
103 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
104 case 0:
105 default:
106 return 1;
107 case 1:
108 return 2;
109 case 2:
110 return 4;
111 case 3:
112 return 8;
113 case 4:
114 return 3;
115 case 5:
116 return 6;
117 case 6:
118 return 10;
119 case 7:
120 return 12;
121 case 8:
122 return 16;
123 }
124}
125
126struct dce6_wm_params {
127 u32 dram_channels; /* number of dram channels */
128 u32 yclk; /* bandwidth per dram data pin in kHz */
129 u32 sclk; /* engine clock in kHz */
130 u32 disp_clk; /* display clock in kHz */
131 u32 src_width; /* viewport width */
132 u32 active_time; /* active display time in ns */
133 u32 blank_time; /* blank time in ns */
134 bool interlaced; /* mode is interlaced */
135 fixed20_12 vsc; /* vertical scale ratio */
136 u32 num_heads; /* number of active crtcs */
137 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
138 u32 lb_size; /* line buffer allocated to pipe */
139 u32 vtaps; /* vertical scaler taps */
140};
141
142static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
143{
144 /* Calculate raw DRAM Bandwidth */
145 fixed20_12 dram_efficiency; /* 0.7 */
146 fixed20_12 yclk, dram_channels, bandwidth;
147 fixed20_12 a;
148
149 a.full = dfixed_const(1000);
150 yclk.full = dfixed_const(wm->yclk);
151 yclk.full = dfixed_div(yclk, a);
152 dram_channels.full = dfixed_const(wm->dram_channels * 4);
153 a.full = dfixed_const(10);
154 dram_efficiency.full = dfixed_const(7);
155 dram_efficiency.full = dfixed_div(dram_efficiency, a);
156 bandwidth.full = dfixed_mul(dram_channels, yclk);
157 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
158
159 return dfixed_trunc(bandwidth);
160}
161
162static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
163{
164 /* Calculate DRAM Bandwidth and the part allocated to display. */
165 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
166 fixed20_12 yclk, dram_channels, bandwidth;
167 fixed20_12 a;
168
169 a.full = dfixed_const(1000);
170 yclk.full = dfixed_const(wm->yclk);
171 yclk.full = dfixed_div(yclk, a);
172 dram_channels.full = dfixed_const(wm->dram_channels * 4);
173 a.full = dfixed_const(10);
174 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
175 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
176 bandwidth.full = dfixed_mul(dram_channels, yclk);
177 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
178
179 return dfixed_trunc(bandwidth);
180}
181
182static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
183{
184 /* Calculate the display Data return Bandwidth */
185 fixed20_12 return_efficiency; /* 0.8 */
186 fixed20_12 sclk, bandwidth;
187 fixed20_12 a;
188
189 a.full = dfixed_const(1000);
190 sclk.full = dfixed_const(wm->sclk);
191 sclk.full = dfixed_div(sclk, a);
192 a.full = dfixed_const(10);
193 return_efficiency.full = dfixed_const(8);
194 return_efficiency.full = dfixed_div(return_efficiency, a);
195 a.full = dfixed_const(32);
196 bandwidth.full = dfixed_mul(a, sclk);
197 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
198
199 return dfixed_trunc(bandwidth);
200}
201
202static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
203{
204 return 32;
205}
206
207static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
208{
209 /* Calculate the DMIF Request Bandwidth */
210 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
211 fixed20_12 disp_clk, sclk, bandwidth;
212 fixed20_12 a, b1, b2;
213 u32 min_bandwidth;
214
215 a.full = dfixed_const(1000);
216 disp_clk.full = dfixed_const(wm->disp_clk);
217 disp_clk.full = dfixed_div(disp_clk, a);
218 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
219 b1.full = dfixed_mul(a, disp_clk);
220
221 a.full = dfixed_const(1000);
222 sclk.full = dfixed_const(wm->sclk);
223 sclk.full = dfixed_div(sclk, a);
224 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
225 b2.full = dfixed_mul(a, sclk);
226
227 a.full = dfixed_const(10);
228 disp_clk_request_efficiency.full = dfixed_const(8);
229 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
230
231 min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
232
233 a.full = dfixed_const(min_bandwidth);
234 bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
235
236 return dfixed_trunc(bandwidth);
237}
238
239static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
240{
241 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
242 u32 dram_bandwidth = dce6_dram_bandwidth(wm);
243 u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
244 u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
245
246 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
247}
248
249static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
250{
251 /* Calculate the display mode Average Bandwidth
252 * DisplayMode should contain the source and destination dimensions,
253 * timing, etc.
254 */
255 fixed20_12 bpp;
256 fixed20_12 line_time;
257 fixed20_12 src_width;
258 fixed20_12 bandwidth;
259 fixed20_12 a;
260
261 a.full = dfixed_const(1000);
262 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
263 line_time.full = dfixed_div(line_time, a);
264 bpp.full = dfixed_const(wm->bytes_per_pixel);
265 src_width.full = dfixed_const(wm->src_width);
266 bandwidth.full = dfixed_mul(src_width, bpp);
267 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
268 bandwidth.full = dfixed_div(bandwidth, line_time);
269
270 return dfixed_trunc(bandwidth);
271}
272
273static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
274{
275 /* First calcualte the latency in ns */
276 u32 mc_latency = 2000; /* 2000 ns. */
277 u32 available_bandwidth = dce6_available_bandwidth(wm);
278 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
279 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
280 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
281 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
282 (wm->num_heads * cursor_line_pair_return_time);
283 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
284 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
285 u32 tmp, dmif_size = 12288;
286 fixed20_12 a, b, c;
287
288 if (wm->num_heads == 0)
289 return 0;
290
291 a.full = dfixed_const(2);
292 b.full = dfixed_const(1);
293 if ((wm->vsc.full > a.full) ||
294 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
295 (wm->vtaps >= 5) ||
296 ((wm->vsc.full >= a.full) && wm->interlaced))
297 max_src_lines_per_dst_line = 4;
298 else
299 max_src_lines_per_dst_line = 2;
300
301 a.full = dfixed_const(available_bandwidth);
302 b.full = dfixed_const(wm->num_heads);
303 a.full = dfixed_div(a, b);
304
305 b.full = dfixed_const(mc_latency + 512);
306 c.full = dfixed_const(wm->disp_clk);
307 b.full = dfixed_div(b, c);
308
309 c.full = dfixed_const(dmif_size);
310 b.full = dfixed_div(c, b);
311
312 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
313
314 b.full = dfixed_const(1000);
315 c.full = dfixed_const(wm->disp_clk);
316 b.full = dfixed_div(c, b);
317 c.full = dfixed_const(wm->bytes_per_pixel);
318 b.full = dfixed_mul(b, c);
319
320 lb_fill_bw = min(tmp, dfixed_trunc(b));
321
322 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
323 b.full = dfixed_const(1000);
324 c.full = dfixed_const(lb_fill_bw);
325 b.full = dfixed_div(c, b);
326 a.full = dfixed_div(a, b);
327 line_fill_time = dfixed_trunc(a);
328
329 if (line_fill_time < wm->active_time)
330 return latency;
331 else
332 return latency + (line_fill_time - wm->active_time);
333
334}
335
336static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
337{
338 if (dce6_average_bandwidth(wm) <=
339 (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
340 return true;
341 else
342 return false;
343};
344
345static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
346{
347 if (dce6_average_bandwidth(wm) <=
348 (dce6_available_bandwidth(wm) / wm->num_heads))
349 return true;
350 else
351 return false;
352};
353
354static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
355{
356 u32 lb_partitions = wm->lb_size / wm->src_width;
357 u32 line_time = wm->active_time + wm->blank_time;
358 u32 latency_tolerant_lines;
359 u32 latency_hiding;
360 fixed20_12 a;
361
362 a.full = dfixed_const(1);
363 if (wm->vsc.full > a.full)
364 latency_tolerant_lines = 1;
365 else {
366 if (lb_partitions <= (wm->vtaps + 1))
367 latency_tolerant_lines = 1;
368 else
369 latency_tolerant_lines = 2;
370 }
371
372 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
373
374 if (dce6_latency_watermark(wm) <= latency_hiding)
375 return true;
376 else
377 return false;
378}
379
380static void dce6_program_watermarks(struct radeon_device *rdev,
381 struct radeon_crtc *radeon_crtc,
382 u32 lb_size, u32 num_heads)
383{
384 struct drm_display_mode *mode = &radeon_crtc->base.mode;
385 struct dce6_wm_params wm;
386 u32 pixel_period;
387 u32 line_time = 0;
388 u32 latency_watermark_a = 0, latency_watermark_b = 0;
389 u32 priority_a_mark = 0, priority_b_mark = 0;
390 u32 priority_a_cnt = PRIORITY_OFF;
391 u32 priority_b_cnt = PRIORITY_OFF;
392 u32 tmp, arb_control3;
393 fixed20_12 a, b, c;
394
395 if (radeon_crtc->base.enabled && num_heads && mode) {
396 pixel_period = 1000000 / (u32)mode->clock;
397 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
398 priority_a_cnt = 0;
399 priority_b_cnt = 0;
400
401 wm.yclk = rdev->pm.current_mclk * 10;
402 wm.sclk = rdev->pm.current_sclk * 10;
403 wm.disp_clk = mode->clock;
404 wm.src_width = mode->crtc_hdisplay;
405 wm.active_time = mode->crtc_hdisplay * pixel_period;
406 wm.blank_time = line_time - wm.active_time;
407 wm.interlaced = false;
408 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
409 wm.interlaced = true;
410 wm.vsc = radeon_crtc->vsc;
411 wm.vtaps = 1;
412 if (radeon_crtc->rmx_type != RMX_OFF)
413 wm.vtaps = 2;
414 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
415 wm.lb_size = lb_size;
416 wm.dram_channels = dce6_get_number_of_dram_channels(rdev);
417 wm.num_heads = num_heads;
418
419 /* set for high clocks */
420 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
421 /* set for low clocks */
422 /* wm.yclk = low clk; wm.sclk = low clk */
423 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
424
425 /* possibly force display priority to high */
426 /* should really do this at mode validation time... */
427 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
428 !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
429 !dce6_check_latency_hiding(&wm) ||
430 (rdev->disp_priority == 2)) {
431 DRM_DEBUG_KMS("force priority to high\n");
432 priority_a_cnt |= PRIORITY_ALWAYS_ON;
433 priority_b_cnt |= PRIORITY_ALWAYS_ON;
434 }
435
436 a.full = dfixed_const(1000);
437 b.full = dfixed_const(mode->clock);
438 b.full = dfixed_div(b, a);
439 c.full = dfixed_const(latency_watermark_a);
440 c.full = dfixed_mul(c, b);
441 c.full = dfixed_mul(c, radeon_crtc->hsc);
442 c.full = dfixed_div(c, a);
443 a.full = dfixed_const(16);
444 c.full = dfixed_div(c, a);
445 priority_a_mark = dfixed_trunc(c);
446 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
447
448 a.full = dfixed_const(1000);
449 b.full = dfixed_const(mode->clock);
450 b.full = dfixed_div(b, a);
451 c.full = dfixed_const(latency_watermark_b);
452 c.full = dfixed_mul(c, b);
453 c.full = dfixed_mul(c, radeon_crtc->hsc);
454 c.full = dfixed_div(c, a);
455 a.full = dfixed_const(16);
456 c.full = dfixed_div(c, a);
457 priority_b_mark = dfixed_trunc(c);
458 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
459 }
460
461 /* select wm A */
462 arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
463 tmp = arb_control3;
464 tmp &= ~LATENCY_WATERMARK_MASK(3);
465 tmp |= LATENCY_WATERMARK_MASK(1);
466 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
467 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
468 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
469 LATENCY_HIGH_WATERMARK(line_time)));
470 /* select wm B */
471 tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
472 tmp &= ~LATENCY_WATERMARK_MASK(3);
473 tmp |= LATENCY_WATERMARK_MASK(2);
474 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
475 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
476 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
477 LATENCY_HIGH_WATERMARK(line_time)));
478 /* restore original selection */
479 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
480
481 /* write the priority marks */
482 WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
483 WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
484
485}
486
487void dce6_bandwidth_update(struct radeon_device *rdev)
488{
489 struct drm_display_mode *mode0 = NULL;
490 struct drm_display_mode *mode1 = NULL;
491 u32 num_heads = 0, lb_size;
492 int i;
493
494 radeon_update_display_priority(rdev);
495
496 for (i = 0; i < rdev->num_crtc; i++) {
497 if (rdev->mode_info.crtcs[i]->base.enabled)
498 num_heads++;
499 }
500 for (i = 0; i < rdev->num_crtc; i += 2) {
501 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
502 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
503 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
504 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
505 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
506 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
507 }
508}
509
Alex Deucher0a96d722012-03-20 17:18:11 -0400510/*
511 * Core functions
512 */
513static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
514 u32 num_tile_pipes,
515 u32 num_backends_per_asic,
516 u32 *backend_disable_mask_per_asic,
517 u32 num_shader_engines)
518{
519 u32 backend_map = 0;
520 u32 enabled_backends_mask = 0;
521 u32 enabled_backends_count = 0;
522 u32 num_backends_per_se;
523 u32 cur_pipe;
524 u32 swizzle_pipe[SI_MAX_PIPES];
525 u32 cur_backend = 0;
526 u32 i;
527 bool force_no_swizzle;
528
529 /* force legal values */
530 if (num_tile_pipes < 1)
531 num_tile_pipes = 1;
532 if (num_tile_pipes > rdev->config.si.max_tile_pipes)
533 num_tile_pipes = rdev->config.si.max_tile_pipes;
534 if (num_shader_engines < 1)
535 num_shader_engines = 1;
536 if (num_shader_engines > rdev->config.si.max_shader_engines)
537 num_shader_engines = rdev->config.si.max_shader_engines;
538 if (num_backends_per_asic < num_shader_engines)
539 num_backends_per_asic = num_shader_engines;
540 if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
541 num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
542
543 /* make sure we have the same number of backends per se */
544 num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
545 /* set up the number of backends per se */
546 num_backends_per_se = num_backends_per_asic / num_shader_engines;
547 if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
548 num_backends_per_se = rdev->config.si.max_backends_per_se;
549 num_backends_per_asic = num_backends_per_se * num_shader_engines;
550 }
551
552 /* create enable mask and count for enabled backends */
553 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
554 if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
555 enabled_backends_mask |= (1 << i);
556 ++enabled_backends_count;
557 }
558 if (enabled_backends_count == num_backends_per_asic)
559 break;
560 }
561
562 /* force the backends mask to match the current number of backends */
563 if (enabled_backends_count != num_backends_per_asic) {
564 u32 this_backend_enabled;
565 u32 shader_engine;
566 u32 backend_per_se;
567
568 enabled_backends_mask = 0;
569 enabled_backends_count = 0;
570 *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
571 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
572 /* calc the current se */
573 shader_engine = i / rdev->config.si.max_backends_per_se;
574 /* calc the backend per se */
575 backend_per_se = i % rdev->config.si.max_backends_per_se;
576 /* default to not enabled */
577 this_backend_enabled = 0;
578 if ((shader_engine < num_shader_engines) &&
579 (backend_per_se < num_backends_per_se))
580 this_backend_enabled = 1;
581 if (this_backend_enabled) {
582 enabled_backends_mask |= (1 << i);
583 *backend_disable_mask_per_asic &= ~(1 << i);
584 ++enabled_backends_count;
585 }
586 }
587 }
588
589
590 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
591 switch (rdev->family) {
592 case CHIP_TAHITI:
593 case CHIP_PITCAIRN:
594 case CHIP_VERDE:
595 force_no_swizzle = true;
596 break;
597 default:
598 force_no_swizzle = false;
599 break;
600 }
601 if (force_no_swizzle) {
602 bool last_backend_enabled = false;
603
604 force_no_swizzle = false;
605 for (i = 0; i < SI_MAX_BACKENDS; ++i) {
606 if (((enabled_backends_mask >> i) & 1) == 1) {
607 if (last_backend_enabled)
608 force_no_swizzle = true;
609 last_backend_enabled = true;
610 } else
611 last_backend_enabled = false;
612 }
613 }
614
615 switch (num_tile_pipes) {
616 case 1:
617 case 3:
618 case 5:
619 case 7:
620 DRM_ERROR("odd number of pipes!\n");
621 break;
622 case 2:
623 swizzle_pipe[0] = 0;
624 swizzle_pipe[1] = 1;
625 break;
626 case 4:
627 if (force_no_swizzle) {
628 swizzle_pipe[0] = 0;
629 swizzle_pipe[1] = 1;
630 swizzle_pipe[2] = 2;
631 swizzle_pipe[3] = 3;
632 } else {
633 swizzle_pipe[0] = 0;
634 swizzle_pipe[1] = 2;
635 swizzle_pipe[2] = 1;
636 swizzle_pipe[3] = 3;
637 }
638 break;
639 case 6:
640 if (force_no_swizzle) {
641 swizzle_pipe[0] = 0;
642 swizzle_pipe[1] = 1;
643 swizzle_pipe[2] = 2;
644 swizzle_pipe[3] = 3;
645 swizzle_pipe[4] = 4;
646 swizzle_pipe[5] = 5;
647 } else {
648 swizzle_pipe[0] = 0;
649 swizzle_pipe[1] = 2;
650 swizzle_pipe[2] = 4;
651 swizzle_pipe[3] = 1;
652 swizzle_pipe[4] = 3;
653 swizzle_pipe[5] = 5;
654 }
655 break;
656 case 8:
657 if (force_no_swizzle) {
658 swizzle_pipe[0] = 0;
659 swizzle_pipe[1] = 1;
660 swizzle_pipe[2] = 2;
661 swizzle_pipe[3] = 3;
662 swizzle_pipe[4] = 4;
663 swizzle_pipe[5] = 5;
664 swizzle_pipe[6] = 6;
665 swizzle_pipe[7] = 7;
666 } else {
667 swizzle_pipe[0] = 0;
668 swizzle_pipe[1] = 2;
669 swizzle_pipe[2] = 4;
670 swizzle_pipe[3] = 6;
671 swizzle_pipe[4] = 1;
672 swizzle_pipe[5] = 3;
673 swizzle_pipe[6] = 5;
674 swizzle_pipe[7] = 7;
675 }
676 break;
677 }
678
679 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
680 while (((1 << cur_backend) & enabled_backends_mask) == 0)
681 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
682
683 backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
684
685 cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
686 }
687
688 return backend_map;
689}
690
691static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
692 u32 disable_mask_per_se,
693 u32 max_disable_mask_per_se,
694 u32 num_shader_engines)
695{
696 u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
697 u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
698
699 if (num_shader_engines == 1)
700 return disable_mask_per_asic;
701 else if (num_shader_engines == 2)
702 return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
703 else
704 return 0xffffffff;
705}
706
707static void si_tiling_mode_table_init(struct radeon_device *rdev)
708{
709 const u32 num_tile_mode_states = 32;
710 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
711
712 switch (rdev->config.si.mem_row_size_in_kb) {
713 case 1:
714 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
715 break;
716 case 2:
717 default:
718 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
719 break;
720 case 4:
721 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
722 break;
723 }
724
725 if ((rdev->family == CHIP_TAHITI) ||
726 (rdev->family == CHIP_PITCAIRN)) {
727 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
728 switch (reg_offset) {
729 case 0: /* non-AA compressed depth or any compressed stencil */
730 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
731 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
732 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
734 NUM_BANKS(ADDR_SURF_16_BANK) |
735 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
736 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
737 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
738 break;
739 case 1: /* 2xAA/4xAA compressed depth only */
740 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
741 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
742 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
743 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
744 NUM_BANKS(ADDR_SURF_16_BANK) |
745 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
748 break;
749 case 2: /* 8xAA compressed depth only */
750 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
751 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
752 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
753 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
754 NUM_BANKS(ADDR_SURF_16_BANK) |
755 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
758 break;
759 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
760 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
761 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
762 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
764 NUM_BANKS(ADDR_SURF_16_BANK) |
765 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
768 break;
769 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
770 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
771 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
772 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
773 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
774 NUM_BANKS(ADDR_SURF_16_BANK) |
775 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
778 break;
779 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
780 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
781 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
782 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
783 TILE_SPLIT(split_equal_to_row_size) |
784 NUM_BANKS(ADDR_SURF_16_BANK) |
785 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
788 break;
789 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
790 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
791 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
792 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
793 TILE_SPLIT(split_equal_to_row_size) |
794 NUM_BANKS(ADDR_SURF_16_BANK) |
795 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
798 break;
799 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
800 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
801 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
802 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
803 TILE_SPLIT(split_equal_to_row_size) |
804 NUM_BANKS(ADDR_SURF_16_BANK) |
805 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
808 break;
809 case 8: /* 1D and 1D Array Surfaces */
810 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
811 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
812 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
813 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
814 NUM_BANKS(ADDR_SURF_16_BANK) |
815 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
816 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
817 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
818 break;
819 case 9: /* Displayable maps. */
820 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
821 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
822 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
824 NUM_BANKS(ADDR_SURF_16_BANK) |
825 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
828 break;
829 case 10: /* Display 8bpp. */
830 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
831 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
832 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
833 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
834 NUM_BANKS(ADDR_SURF_16_BANK) |
835 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
836 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
837 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
838 break;
839 case 11: /* Display 16bpp. */
840 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
841 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
842 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
843 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
844 NUM_BANKS(ADDR_SURF_16_BANK) |
845 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
848 break;
849 case 12: /* Display 32bpp. */
850 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
851 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
852 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
853 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
854 NUM_BANKS(ADDR_SURF_16_BANK) |
855 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
858 break;
859 case 13: /* Thin. */
860 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
861 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
862 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
863 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
864 NUM_BANKS(ADDR_SURF_16_BANK) |
865 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
868 break;
869 case 14: /* Thin 8 bpp. */
870 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
871 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
872 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
873 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
874 NUM_BANKS(ADDR_SURF_16_BANK) |
875 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
878 break;
879 case 15: /* Thin 16 bpp. */
880 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
881 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
882 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
883 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
884 NUM_BANKS(ADDR_SURF_16_BANK) |
885 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
888 break;
889 case 16: /* Thin 32 bpp. */
890 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
891 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
892 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
893 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
894 NUM_BANKS(ADDR_SURF_16_BANK) |
895 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
898 break;
899 case 17: /* Thin 64 bpp. */
900 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
901 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
902 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
903 TILE_SPLIT(split_equal_to_row_size) |
904 NUM_BANKS(ADDR_SURF_16_BANK) |
905 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
908 break;
909 case 21: /* 8 bpp PRT. */
910 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
911 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
912 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
913 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
914 NUM_BANKS(ADDR_SURF_16_BANK) |
915 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
918 break;
919 case 22: /* 16 bpp PRT */
920 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
921 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
922 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
923 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
924 NUM_BANKS(ADDR_SURF_16_BANK) |
925 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
928 break;
929 case 23: /* 32 bpp PRT */
930 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
931 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
932 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
933 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
934 NUM_BANKS(ADDR_SURF_16_BANK) |
935 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
938 break;
939 case 24: /* 64 bpp PRT */
940 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
941 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
942 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
943 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
944 NUM_BANKS(ADDR_SURF_16_BANK) |
945 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
948 break;
949 case 25: /* 128 bpp PRT */
950 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
952 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
953 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
954 NUM_BANKS(ADDR_SURF_8_BANK) |
955 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
958 break;
959 default:
960 gb_tile_moden = 0;
961 break;
962 }
963 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
964 }
965 } else if (rdev->family == CHIP_VERDE) {
966 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
967 switch (reg_offset) {
968 case 0: /* non-AA compressed depth or any compressed stencil */
969 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
970 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
971 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
972 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
973 NUM_BANKS(ADDR_SURF_16_BANK) |
974 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
977 break;
978 case 1: /* 2xAA/4xAA compressed depth only */
979 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
980 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
981 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
983 NUM_BANKS(ADDR_SURF_16_BANK) |
984 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
987 break;
988 case 2: /* 8xAA compressed depth only */
989 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
990 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
991 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
993 NUM_BANKS(ADDR_SURF_16_BANK) |
994 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
997 break;
998 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
999 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1000 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1001 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1002 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1003 NUM_BANKS(ADDR_SURF_16_BANK) |
1004 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1005 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1006 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1007 break;
1008 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1009 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1010 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1011 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1012 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1013 NUM_BANKS(ADDR_SURF_16_BANK) |
1014 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1017 break;
1018 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1019 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1020 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1021 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1022 TILE_SPLIT(split_equal_to_row_size) |
1023 NUM_BANKS(ADDR_SURF_16_BANK) |
1024 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1027 break;
1028 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1029 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1030 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1031 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1032 TILE_SPLIT(split_equal_to_row_size) |
1033 NUM_BANKS(ADDR_SURF_16_BANK) |
1034 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1035 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1036 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1037 break;
1038 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1039 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1040 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1041 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1042 TILE_SPLIT(split_equal_to_row_size) |
1043 NUM_BANKS(ADDR_SURF_16_BANK) |
1044 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1047 break;
1048 case 8: /* 1D and 1D Array Surfaces */
1049 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1050 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1051 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1052 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1053 NUM_BANKS(ADDR_SURF_16_BANK) |
1054 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1057 break;
1058 case 9: /* Displayable maps. */
1059 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1060 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1061 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1063 NUM_BANKS(ADDR_SURF_16_BANK) |
1064 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1067 break;
1068 case 10: /* Display 8bpp. */
1069 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1070 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1071 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1072 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1073 NUM_BANKS(ADDR_SURF_16_BANK) |
1074 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1077 break;
1078 case 11: /* Display 16bpp. */
1079 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1080 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1081 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1082 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1083 NUM_BANKS(ADDR_SURF_16_BANK) |
1084 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1085 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1086 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1087 break;
1088 case 12: /* Display 32bpp. */
1089 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1090 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1091 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1093 NUM_BANKS(ADDR_SURF_16_BANK) |
1094 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1095 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1096 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1097 break;
1098 case 13: /* Thin. */
1099 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1100 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1101 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1102 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1103 NUM_BANKS(ADDR_SURF_16_BANK) |
1104 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1105 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1106 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1107 break;
1108 case 14: /* Thin 8 bpp. */
1109 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1110 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1111 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1113 NUM_BANKS(ADDR_SURF_16_BANK) |
1114 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1115 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1116 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1117 break;
1118 case 15: /* Thin 16 bpp. */
1119 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1120 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1121 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1122 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1123 NUM_BANKS(ADDR_SURF_16_BANK) |
1124 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1127 break;
1128 case 16: /* Thin 32 bpp. */
1129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1130 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1133 NUM_BANKS(ADDR_SURF_16_BANK) |
1134 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1137 break;
1138 case 17: /* Thin 64 bpp. */
1139 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1140 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1141 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1142 TILE_SPLIT(split_equal_to_row_size) |
1143 NUM_BANKS(ADDR_SURF_16_BANK) |
1144 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1147 break;
1148 case 21: /* 8 bpp PRT. */
1149 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1150 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1151 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1152 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1153 NUM_BANKS(ADDR_SURF_16_BANK) |
1154 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1157 break;
1158 case 22: /* 16 bpp PRT */
1159 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1160 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1161 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1162 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1163 NUM_BANKS(ADDR_SURF_16_BANK) |
1164 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1167 break;
1168 case 23: /* 32 bpp PRT */
1169 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1171 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1173 NUM_BANKS(ADDR_SURF_16_BANK) |
1174 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1177 break;
1178 case 24: /* 64 bpp PRT */
1179 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1180 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1181 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1183 NUM_BANKS(ADDR_SURF_16_BANK) |
1184 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1187 break;
1188 case 25: /* 128 bpp PRT */
1189 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1190 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1191 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1192 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1193 NUM_BANKS(ADDR_SURF_8_BANK) |
1194 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1197 break;
1198 default:
1199 gb_tile_moden = 0;
1200 break;
1201 }
1202 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1203 }
1204 } else
1205 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1206}
1207
1208static void si_gpu_init(struct radeon_device *rdev)
1209{
1210 u32 cc_rb_backend_disable = 0;
1211 u32 cc_gc_shader_array_config;
1212 u32 gb_addr_config = 0;
1213 u32 mc_shared_chmap, mc_arb_ramcfg;
1214 u32 gb_backend_map;
1215 u32 cgts_tcc_disable;
1216 u32 sx_debug_1;
1217 u32 gc_user_shader_array_config;
1218 u32 gc_user_rb_backend_disable;
1219 u32 cgts_user_tcc_disable;
1220 u32 hdp_host_path_cntl;
1221 u32 tmp;
1222 int i, j;
1223
1224 switch (rdev->family) {
1225 case CHIP_TAHITI:
1226 rdev->config.si.max_shader_engines = 2;
1227 rdev->config.si.max_pipes_per_simd = 4;
1228 rdev->config.si.max_tile_pipes = 12;
1229 rdev->config.si.max_simds_per_se = 8;
1230 rdev->config.si.max_backends_per_se = 4;
1231 rdev->config.si.max_texture_channel_caches = 12;
1232 rdev->config.si.max_gprs = 256;
1233 rdev->config.si.max_gs_threads = 32;
1234 rdev->config.si.max_hw_contexts = 8;
1235
1236 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1237 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1238 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1239 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1240 break;
1241 case CHIP_PITCAIRN:
1242 rdev->config.si.max_shader_engines = 2;
1243 rdev->config.si.max_pipes_per_simd = 4;
1244 rdev->config.si.max_tile_pipes = 8;
1245 rdev->config.si.max_simds_per_se = 5;
1246 rdev->config.si.max_backends_per_se = 4;
1247 rdev->config.si.max_texture_channel_caches = 8;
1248 rdev->config.si.max_gprs = 256;
1249 rdev->config.si.max_gs_threads = 32;
1250 rdev->config.si.max_hw_contexts = 8;
1251
1252 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1253 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1254 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1255 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1256 break;
1257 case CHIP_VERDE:
1258 default:
1259 rdev->config.si.max_shader_engines = 1;
1260 rdev->config.si.max_pipes_per_simd = 4;
1261 rdev->config.si.max_tile_pipes = 4;
1262 rdev->config.si.max_simds_per_se = 2;
1263 rdev->config.si.max_backends_per_se = 4;
1264 rdev->config.si.max_texture_channel_caches = 4;
1265 rdev->config.si.max_gprs = 256;
1266 rdev->config.si.max_gs_threads = 32;
1267 rdev->config.si.max_hw_contexts = 8;
1268
1269 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1270 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1271 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1272 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1273 break;
1274 }
1275
1276 /* Initialize HDP */
1277 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1278 WREG32((0x2c14 + j), 0x00000000);
1279 WREG32((0x2c18 + j), 0x00000000);
1280 WREG32((0x2c1c + j), 0x00000000);
1281 WREG32((0x2c20 + j), 0x00000000);
1282 WREG32((0x2c24 + j), 0x00000000);
1283 }
1284
1285 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1286
1287 evergreen_fix_pci_max_read_req_size(rdev);
1288
1289 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1290
1291 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1292 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1293
1294 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
1295 cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1296 cgts_tcc_disable = 0xffff0000;
1297 for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
1298 cgts_tcc_disable &= ~(1 << (16 + i));
1299 gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
1300 gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1301 cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
1302
1303 rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
1304 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1305 tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1306 rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
1307 tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
1308 rdev->config.si.backend_disable_mask_per_asic =
1309 si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
1310 rdev->config.si.num_shader_engines);
1311 rdev->config.si.backend_map =
1312 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1313 rdev->config.si.num_backends_per_se *
1314 rdev->config.si.num_shader_engines,
1315 &rdev->config.si.backend_disable_mask_per_asic,
1316 rdev->config.si.num_shader_engines);
1317 tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
1318 rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
1319 rdev->config.si.mem_max_burst_length_bytes = 256;
1320 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1321 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1322 if (rdev->config.si.mem_row_size_in_kb > 4)
1323 rdev->config.si.mem_row_size_in_kb = 4;
1324 /* XXX use MC settings? */
1325 rdev->config.si.shader_engine_tile_size = 32;
1326 rdev->config.si.num_gpus = 1;
1327 rdev->config.si.multi_gpu_tile_size = 64;
1328
1329 gb_addr_config = 0;
1330 switch (rdev->config.si.num_tile_pipes) {
1331 case 1:
1332 gb_addr_config |= NUM_PIPES(0);
1333 break;
1334 case 2:
1335 gb_addr_config |= NUM_PIPES(1);
1336 break;
1337 case 4:
1338 gb_addr_config |= NUM_PIPES(2);
1339 break;
1340 case 8:
1341 default:
1342 gb_addr_config |= NUM_PIPES(3);
1343 break;
1344 }
1345
1346 tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
1347 gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
1348 gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
1349 tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
1350 gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
1351 switch (rdev->config.si.num_gpus) {
1352 case 1:
1353 default:
1354 gb_addr_config |= NUM_GPUS(0);
1355 break;
1356 case 2:
1357 gb_addr_config |= NUM_GPUS(1);
1358 break;
1359 case 4:
1360 gb_addr_config |= NUM_GPUS(2);
1361 break;
1362 }
1363 switch (rdev->config.si.multi_gpu_tile_size) {
1364 case 16:
1365 gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
1366 break;
1367 case 32:
1368 default:
1369 gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
1370 break;
1371 case 64:
1372 gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
1373 break;
1374 case 128:
1375 gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
1376 break;
1377 }
1378 switch (rdev->config.si.mem_row_size_in_kb) {
1379 case 1:
1380 default:
1381 gb_addr_config |= ROW_SIZE(0);
1382 break;
1383 case 2:
1384 gb_addr_config |= ROW_SIZE(1);
1385 break;
1386 case 4:
1387 gb_addr_config |= ROW_SIZE(2);
1388 break;
1389 }
1390
1391 tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
1392 rdev->config.si.num_tile_pipes = (1 << tmp);
1393 tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
1394 rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
1395 tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
1396 rdev->config.si.num_shader_engines = tmp + 1;
1397 tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
1398 rdev->config.si.num_gpus = tmp + 1;
1399 tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
1400 rdev->config.si.multi_gpu_tile_size = 1 << tmp;
1401 tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
1402 rdev->config.si.mem_row_size_in_kb = 1 << tmp;
1403
1404 gb_backend_map =
1405 si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
1406 rdev->config.si.num_backends_per_se *
1407 rdev->config.si.num_shader_engines,
1408 &rdev->config.si.backend_disable_mask_per_asic,
1409 rdev->config.si.num_shader_engines);
1410
1411 /* setup tiling info dword. gb_addr_config is not adequate since it does
1412 * not have bank info, so create a custom tiling dword.
1413 * bits 3:0 num_pipes
1414 * bits 7:4 num_banks
1415 * bits 11:8 group_size
1416 * bits 15:12 row_size
1417 */
1418 rdev->config.si.tile_config = 0;
1419 switch (rdev->config.si.num_tile_pipes) {
1420 case 1:
1421 rdev->config.si.tile_config |= (0 << 0);
1422 break;
1423 case 2:
1424 rdev->config.si.tile_config |= (1 << 0);
1425 break;
1426 case 4:
1427 rdev->config.si.tile_config |= (2 << 0);
1428 break;
1429 case 8:
1430 default:
1431 /* XXX what about 12? */
1432 rdev->config.si.tile_config |= (3 << 0);
1433 break;
1434 }
1435 rdev->config.si.tile_config |=
1436 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
1437 rdev->config.si.tile_config |=
1438 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1439 rdev->config.si.tile_config |=
1440 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1441
1442 rdev->config.si.backend_map = gb_backend_map;
1443 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1444 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1445 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1446
1447 /* primary versions */
1448 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1449 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1450 WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1451
1452 WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1453
1454 /* user versions */
1455 WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1456 WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
1457 WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
1458
1459 WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1460
1461 si_tiling_mode_table_init(rdev);
1462
1463 /* set HW defaults for 3D engine */
1464 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1465 ROQ_IB2_START(0x2b)));
1466 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1467
1468 sx_debug_1 = RREG32(SX_DEBUG_1);
1469 WREG32(SX_DEBUG_1, sx_debug_1);
1470
1471 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1472
1473 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1474 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1475 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1476 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1477
1478 WREG32(VGT_NUM_INSTANCES, 1);
1479
1480 WREG32(CP_PERFMON_CNTL, 0);
1481
1482 WREG32(SQ_CONFIG, 0);
1483
1484 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1485 FORCE_EOV_MAX_REZ_CNT(255)));
1486
1487 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1488 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1489
1490 WREG32(VGT_GS_VERTEX_REUSE, 16);
1491 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1492
1493 WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1494 WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1495 WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1496 WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1497 WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1498 WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1499 WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1500 WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1501
1502 tmp = RREG32(HDP_MISC_CNTL);
1503 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1504 WREG32(HDP_MISC_CNTL, tmp);
1505
1506 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1507 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1508
1509 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1510
1511 udelay(50);
1512}
Alex Deucherc476dde2012-03-20 17:18:12 -04001513
1514bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1515{
1516 u32 srbm_status;
1517 u32 grbm_status, grbm_status2;
1518 u32 grbm_status_se0, grbm_status_se1;
1519 struct r100_gpu_lockup *lockup = &rdev->config.si.lockup;
1520 int r;
1521
1522 srbm_status = RREG32(SRBM_STATUS);
1523 grbm_status = RREG32(GRBM_STATUS);
1524 grbm_status2 = RREG32(GRBM_STATUS2);
1525 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
1526 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
1527 if (!(grbm_status & GUI_ACTIVE)) {
1528 r100_gpu_lockup_update(lockup, ring);
1529 return false;
1530 }
1531 /* force CP activities */
1532 r = radeon_ring_lock(rdev, ring, 2);
1533 if (!r) {
1534 /* PACKET2 NOP */
1535 radeon_ring_write(ring, 0x80000000);
1536 radeon_ring_write(ring, 0x80000000);
1537 radeon_ring_unlock_commit(rdev, ring);
1538 }
1539 /* XXX deal with CP0,1,2 */
1540 ring->rptr = RREG32(ring->rptr_reg);
1541 return r100_gpu_cp_is_lockup(rdev, lockup, ring);
1542}
1543
1544static int si_gpu_soft_reset(struct radeon_device *rdev)
1545{
1546 struct evergreen_mc_save save;
1547 u32 grbm_reset = 0;
1548
1549 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
1550 return 0;
1551
1552 dev_info(rdev->dev, "GPU softreset \n");
1553 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1554 RREG32(GRBM_STATUS));
1555 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1556 RREG32(GRBM_STATUS2));
1557 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1558 RREG32(GRBM_STATUS_SE0));
1559 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1560 RREG32(GRBM_STATUS_SE1));
1561 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1562 RREG32(SRBM_STATUS));
1563 evergreen_mc_stop(rdev, &save);
1564 if (radeon_mc_wait_for_idle(rdev)) {
1565 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1566 }
1567 /* Disable CP parsing/prefetching */
1568 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
1569
1570 /* reset all the gfx blocks */
1571 grbm_reset = (SOFT_RESET_CP |
1572 SOFT_RESET_CB |
1573 SOFT_RESET_DB |
1574 SOFT_RESET_GDS |
1575 SOFT_RESET_PA |
1576 SOFT_RESET_SC |
1577 SOFT_RESET_SPI |
1578 SOFT_RESET_SX |
1579 SOFT_RESET_TC |
1580 SOFT_RESET_TA |
1581 SOFT_RESET_VGT |
1582 SOFT_RESET_IA);
1583
1584 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1585 WREG32(GRBM_SOFT_RESET, grbm_reset);
1586 (void)RREG32(GRBM_SOFT_RESET);
1587 udelay(50);
1588 WREG32(GRBM_SOFT_RESET, 0);
1589 (void)RREG32(GRBM_SOFT_RESET);
1590 /* Wait a little for things to settle down */
1591 udelay(50);
1592 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1593 RREG32(GRBM_STATUS));
1594 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1595 RREG32(GRBM_STATUS2));
1596 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1597 RREG32(GRBM_STATUS_SE0));
1598 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1599 RREG32(GRBM_STATUS_SE1));
1600 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1601 RREG32(SRBM_STATUS));
1602 evergreen_mc_resume(rdev, &save);
1603 return 0;
1604}
1605
1606int si_asic_reset(struct radeon_device *rdev)
1607{
1608 return si_gpu_soft_reset(rdev);
1609}
1610
Alex Deucherd2800ee2012-03-20 17:18:13 -04001611/* MC */
1612static void si_mc_program(struct radeon_device *rdev)
1613{
1614 struct evergreen_mc_save save;
1615 u32 tmp;
1616 int i, j;
1617
1618 /* Initialize HDP */
1619 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1620 WREG32((0x2c14 + j), 0x00000000);
1621 WREG32((0x2c18 + j), 0x00000000);
1622 WREG32((0x2c1c + j), 0x00000000);
1623 WREG32((0x2c20 + j), 0x00000000);
1624 WREG32((0x2c24 + j), 0x00000000);
1625 }
1626 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
1627
1628 evergreen_mc_stop(rdev, &save);
1629 if (radeon_mc_wait_for_idle(rdev)) {
1630 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1631 }
1632 /* Lockout access through VGA aperture*/
1633 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
1634 /* Update configuration */
1635 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
1636 rdev->mc.vram_start >> 12);
1637 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
1638 rdev->mc.vram_end >> 12);
1639 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
1640 rdev->vram_scratch.gpu_addr >> 12);
1641 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
1642 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
1643 WREG32(MC_VM_FB_LOCATION, tmp);
1644 /* XXX double check these! */
1645 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
1646 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
1647 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
1648 WREG32(MC_VM_AGP_BASE, 0);
1649 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
1650 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
1651 if (radeon_mc_wait_for_idle(rdev)) {
1652 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1653 }
1654 evergreen_mc_resume(rdev, &save);
1655 /* we need to own VRAM, so turn off the VGA renderer here
1656 * to stop it overwriting our objects */
1657 rv515_vga_render_disable(rdev);
1658}
1659
1660/* SI MC address space is 40 bits */
1661static void si_vram_location(struct radeon_device *rdev,
1662 struct radeon_mc *mc, u64 base)
1663{
1664 mc->vram_start = base;
1665 if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
1666 dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
1667 mc->real_vram_size = mc->aper_size;
1668 mc->mc_vram_size = mc->aper_size;
1669 }
1670 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
1671 dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
1672 mc->mc_vram_size >> 20, mc->vram_start,
1673 mc->vram_end, mc->real_vram_size >> 20);
1674}
1675
1676static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
1677{
1678 u64 size_af, size_bf;
1679
1680 size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
1681 size_bf = mc->vram_start & ~mc->gtt_base_align;
1682 if (size_bf > size_af) {
1683 if (mc->gtt_size > size_bf) {
1684 dev_warn(rdev->dev, "limiting GTT\n");
1685 mc->gtt_size = size_bf;
1686 }
1687 mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
1688 } else {
1689 if (mc->gtt_size > size_af) {
1690 dev_warn(rdev->dev, "limiting GTT\n");
1691 mc->gtt_size = size_af;
1692 }
1693 mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
1694 }
1695 mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
1696 dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
1697 mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
1698}
1699
1700static void si_vram_gtt_location(struct radeon_device *rdev,
1701 struct radeon_mc *mc)
1702{
1703 if (mc->mc_vram_size > 0xFFC0000000ULL) {
1704 /* leave room for at least 1024M GTT */
1705 dev_warn(rdev->dev, "limiting VRAM\n");
1706 mc->real_vram_size = 0xFFC0000000ULL;
1707 mc->mc_vram_size = 0xFFC0000000ULL;
1708 }
1709 si_vram_location(rdev, &rdev->mc, 0);
1710 rdev->mc.gtt_base_align = 0;
1711 si_gtt_location(rdev, mc);
1712}
1713
1714static int si_mc_init(struct radeon_device *rdev)
1715{
1716 u32 tmp;
1717 int chansize, numchan;
1718
1719 /* Get VRAM informations */
1720 rdev->mc.vram_is_ddr = true;
1721 tmp = RREG32(MC_ARB_RAMCFG);
1722 if (tmp & CHANSIZE_OVERRIDE) {
1723 chansize = 16;
1724 } else if (tmp & CHANSIZE_MASK) {
1725 chansize = 64;
1726 } else {
1727 chansize = 32;
1728 }
1729 tmp = RREG32(MC_SHARED_CHMAP);
1730 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1731 case 0:
1732 default:
1733 numchan = 1;
1734 break;
1735 case 1:
1736 numchan = 2;
1737 break;
1738 case 2:
1739 numchan = 4;
1740 break;
1741 case 3:
1742 numchan = 8;
1743 break;
1744 case 4:
1745 numchan = 3;
1746 break;
1747 case 5:
1748 numchan = 6;
1749 break;
1750 case 6:
1751 numchan = 10;
1752 break;
1753 case 7:
1754 numchan = 12;
1755 break;
1756 case 8:
1757 numchan = 16;
1758 break;
1759 }
1760 rdev->mc.vram_width = numchan * chansize;
1761 /* Could aper size report 0 ? */
1762 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
1763 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
1764 /* size in MB on si */
1765 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1766 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1767 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1768 si_vram_gtt_location(rdev, &rdev->mc);
1769 radeon_update_bandwidth_info(rdev);
1770
1771 return 0;
1772}
1773
1774/*
1775 * GART
1776 */
1777void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
1778{
1779 /* flush hdp cache */
1780 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1781
1782 /* bits 0-15 are the VM contexts0-15 */
1783 WREG32(VM_INVALIDATE_REQUEST, 1);
1784}
1785
1786int si_pcie_gart_enable(struct radeon_device *rdev)
1787{
1788 int r, i;
1789
1790 if (rdev->gart.robj == NULL) {
1791 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1792 return -EINVAL;
1793 }
1794 r = radeon_gart_table_vram_pin(rdev);
1795 if (r)
1796 return r;
1797 radeon_gart_restore(rdev);
1798 /* Setup TLB control */
1799 WREG32(MC_VM_MX_L1_TLB_CNTL,
1800 (0xA << 7) |
1801 ENABLE_L1_TLB |
1802 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1803 ENABLE_ADVANCED_DRIVER_MODEL |
1804 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1805 /* Setup L2 cache */
1806 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1807 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1808 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1809 EFFECTIVE_L2_QUEUE_SIZE(7) |
1810 CONTEXT1_IDENTITY_ACCESS_MODE(1));
1811 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1812 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1813 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
1814 /* setup context0 */
1815 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1816 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1817 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1818 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1819 (u32)(rdev->dummy_page.addr >> 12));
1820 WREG32(VM_CONTEXT0_CNTL2, 0);
1821 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1822 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
1823
1824 WREG32(0x15D4, 0);
1825 WREG32(0x15D8, 0);
1826 WREG32(0x15DC, 0);
1827
1828 /* empty context1-15 */
1829 /* FIXME start with 1G, once using 2 level pt switch to full
1830 * vm size space
1831 */
1832 /* set vm size, must be a multiple of 4 */
1833 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
1834 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
1835 for (i = 1; i < 16; i++) {
1836 if (i < 8)
1837 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1838 rdev->gart.table_addr >> 12);
1839 else
1840 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
1841 rdev->gart.table_addr >> 12);
1842 }
1843
1844 /* enable context1-15 */
1845 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1846 (u32)(rdev->dummy_page.addr >> 12));
1847 WREG32(VM_CONTEXT1_CNTL2, 0);
1848 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1849 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1850
1851 si_pcie_gart_tlb_flush(rdev);
1852 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1853 (unsigned)(rdev->mc.gtt_size >> 20),
1854 (unsigned long long)rdev->gart.table_addr);
1855 rdev->gart.ready = true;
1856 return 0;
1857}
1858
1859void si_pcie_gart_disable(struct radeon_device *rdev)
1860{
1861 /* Disable all tables */
1862 WREG32(VM_CONTEXT0_CNTL, 0);
1863 WREG32(VM_CONTEXT1_CNTL, 0);
1864 /* Setup TLB control */
1865 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1866 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1867 /* Setup L2 cache */
1868 WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1869 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1870 EFFECTIVE_L2_QUEUE_SIZE(7) |
1871 CONTEXT1_IDENTITY_ACCESS_MODE(1));
1872 WREG32(VM_L2_CNTL2, 0);
1873 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1874 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
1875 radeon_gart_table_vram_unpin(rdev);
1876}
1877
1878void si_pcie_gart_fini(struct radeon_device *rdev)
1879{
1880 si_pcie_gart_disable(rdev);
1881 radeon_gart_table_vram_free(rdev);
1882 radeon_gart_fini(rdev);
1883}
1884
Alex Deucher498dd8b2012-03-20 17:18:15 -04001885/* vm parser */
1886static bool si_vm_reg_valid(u32 reg)
1887{
1888 /* context regs are fine */
1889 if (reg >= 0x28000)
1890 return true;
1891
1892 /* check config regs */
1893 switch (reg) {
1894 case GRBM_GFX_INDEX:
1895 case VGT_VTX_VECT_EJECT_REG:
1896 case VGT_CACHE_INVALIDATION:
1897 case VGT_ESGS_RING_SIZE:
1898 case VGT_GSVS_RING_SIZE:
1899 case VGT_GS_VERTEX_REUSE:
1900 case VGT_PRIMITIVE_TYPE:
1901 case VGT_INDEX_TYPE:
1902 case VGT_NUM_INDICES:
1903 case VGT_NUM_INSTANCES:
1904 case VGT_TF_RING_SIZE:
1905 case VGT_HS_OFFCHIP_PARAM:
1906 case VGT_TF_MEMORY_BASE:
1907 case PA_CL_ENHANCE:
1908 case PA_SU_LINE_STIPPLE_VALUE:
1909 case PA_SC_LINE_STIPPLE_STATE:
1910 case PA_SC_ENHANCE:
1911 case SQC_CACHES:
1912 case SPI_STATIC_THREAD_MGMT_1:
1913 case SPI_STATIC_THREAD_MGMT_2:
1914 case SPI_STATIC_THREAD_MGMT_3:
1915 case SPI_PS_MAX_WAVE_ID:
1916 case SPI_CONFIG_CNTL:
1917 case SPI_CONFIG_CNTL_1:
1918 case TA_CNTL_AUX:
1919 return true;
1920 default:
1921 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
1922 return false;
1923 }
1924}
1925
1926static int si_vm_packet3_ce_check(struct radeon_device *rdev,
1927 u32 *ib, struct radeon_cs_packet *pkt)
1928{
1929 switch (pkt->opcode) {
1930 case PACKET3_NOP:
1931 case PACKET3_SET_BASE:
1932 case PACKET3_SET_CE_DE_COUNTERS:
1933 case PACKET3_LOAD_CONST_RAM:
1934 case PACKET3_WRITE_CONST_RAM:
1935 case PACKET3_WRITE_CONST_RAM_OFFSET:
1936 case PACKET3_DUMP_CONST_RAM:
1937 case PACKET3_INCREMENT_CE_COUNTER:
1938 case PACKET3_WAIT_ON_DE_COUNTER:
1939 case PACKET3_CE_WRITE:
1940 break;
1941 default:
1942 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
1943 return -EINVAL;
1944 }
1945 return 0;
1946}
1947
1948static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
1949 u32 *ib, struct radeon_cs_packet *pkt)
1950{
1951 u32 idx = pkt->idx + 1;
1952 u32 idx_value = ib[idx];
1953 u32 start_reg, end_reg, reg, i;
1954
1955 switch (pkt->opcode) {
1956 case PACKET3_NOP:
1957 case PACKET3_SET_BASE:
1958 case PACKET3_CLEAR_STATE:
1959 case PACKET3_INDEX_BUFFER_SIZE:
1960 case PACKET3_DISPATCH_DIRECT:
1961 case PACKET3_DISPATCH_INDIRECT:
1962 case PACKET3_ALLOC_GDS:
1963 case PACKET3_WRITE_GDS_RAM:
1964 case PACKET3_ATOMIC_GDS:
1965 case PACKET3_ATOMIC:
1966 case PACKET3_OCCLUSION_QUERY:
1967 case PACKET3_SET_PREDICATION:
1968 case PACKET3_COND_EXEC:
1969 case PACKET3_PRED_EXEC:
1970 case PACKET3_DRAW_INDIRECT:
1971 case PACKET3_DRAW_INDEX_INDIRECT:
1972 case PACKET3_INDEX_BASE:
1973 case PACKET3_DRAW_INDEX_2:
1974 case PACKET3_CONTEXT_CONTROL:
1975 case PACKET3_INDEX_TYPE:
1976 case PACKET3_DRAW_INDIRECT_MULTI:
1977 case PACKET3_DRAW_INDEX_AUTO:
1978 case PACKET3_DRAW_INDEX_IMMD:
1979 case PACKET3_NUM_INSTANCES:
1980 case PACKET3_DRAW_INDEX_MULTI_AUTO:
1981 case PACKET3_STRMOUT_BUFFER_UPDATE:
1982 case PACKET3_DRAW_INDEX_OFFSET_2:
1983 case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
1984 case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
1985 case PACKET3_MPEG_INDEX:
1986 case PACKET3_WAIT_REG_MEM:
1987 case PACKET3_MEM_WRITE:
1988 case PACKET3_PFP_SYNC_ME:
1989 case PACKET3_SURFACE_SYNC:
1990 case PACKET3_EVENT_WRITE:
1991 case PACKET3_EVENT_WRITE_EOP:
1992 case PACKET3_EVENT_WRITE_EOS:
1993 case PACKET3_SET_CONTEXT_REG:
1994 case PACKET3_SET_CONTEXT_REG_INDIRECT:
1995 case PACKET3_SET_SH_REG:
1996 case PACKET3_SET_SH_REG_OFFSET:
1997 case PACKET3_INCREMENT_DE_COUNTER:
1998 case PACKET3_WAIT_ON_CE_COUNTER:
1999 case PACKET3_WAIT_ON_AVAIL_BUFFER:
2000 case PACKET3_ME_WRITE:
2001 break;
2002 case PACKET3_COPY_DATA:
2003 if ((idx_value & 0xf00) == 0) {
2004 reg = ib[idx + 3] * 4;
2005 if (!si_vm_reg_valid(reg))
2006 return -EINVAL;
2007 }
2008 break;
2009 case PACKET3_WRITE_DATA:
2010 if ((idx_value & 0xf00) == 0) {
2011 start_reg = ib[idx + 1] * 4;
2012 if (idx_value & 0x10000) {
2013 if (!si_vm_reg_valid(start_reg))
2014 return -EINVAL;
2015 } else {
2016 for (i = 0; i < (pkt->count - 2); i++) {
2017 reg = start_reg + (4 * i);
2018 if (!si_vm_reg_valid(reg))
2019 return -EINVAL;
2020 }
2021 }
2022 }
2023 break;
2024 case PACKET3_COND_WRITE:
2025 if (idx_value & 0x100) {
2026 reg = ib[idx + 5] * 4;
2027 if (!si_vm_reg_valid(reg))
2028 return -EINVAL;
2029 }
2030 break;
2031 case PACKET3_COPY_DW:
2032 if (idx_value & 0x2) {
2033 reg = ib[idx + 3] * 4;
2034 if (!si_vm_reg_valid(reg))
2035 return -EINVAL;
2036 }
2037 break;
2038 case PACKET3_SET_CONFIG_REG:
2039 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2040 end_reg = 4 * pkt->count + start_reg - 4;
2041 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2042 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2043 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2044 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2045 return -EINVAL;
2046 }
2047 for (i = 0; i < pkt->count; i++) {
2048 reg = start_reg + (4 * i);
2049 if (!si_vm_reg_valid(reg))
2050 return -EINVAL;
2051 }
2052 break;
2053 default:
2054 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2055 return -EINVAL;
2056 }
2057 return 0;
2058}
2059
2060static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2061 u32 *ib, struct radeon_cs_packet *pkt)
2062{
2063 u32 idx = pkt->idx + 1;
2064 u32 idx_value = ib[idx];
2065 u32 start_reg, reg, i;
2066
2067 switch (pkt->opcode) {
2068 case PACKET3_NOP:
2069 case PACKET3_SET_BASE:
2070 case PACKET3_CLEAR_STATE:
2071 case PACKET3_DISPATCH_DIRECT:
2072 case PACKET3_DISPATCH_INDIRECT:
2073 case PACKET3_ALLOC_GDS:
2074 case PACKET3_WRITE_GDS_RAM:
2075 case PACKET3_ATOMIC_GDS:
2076 case PACKET3_ATOMIC:
2077 case PACKET3_OCCLUSION_QUERY:
2078 case PACKET3_SET_PREDICATION:
2079 case PACKET3_COND_EXEC:
2080 case PACKET3_PRED_EXEC:
2081 case PACKET3_CONTEXT_CONTROL:
2082 case PACKET3_STRMOUT_BUFFER_UPDATE:
2083 case PACKET3_WAIT_REG_MEM:
2084 case PACKET3_MEM_WRITE:
2085 case PACKET3_PFP_SYNC_ME:
2086 case PACKET3_SURFACE_SYNC:
2087 case PACKET3_EVENT_WRITE:
2088 case PACKET3_EVENT_WRITE_EOP:
2089 case PACKET3_EVENT_WRITE_EOS:
2090 case PACKET3_SET_CONTEXT_REG:
2091 case PACKET3_SET_CONTEXT_REG_INDIRECT:
2092 case PACKET3_SET_SH_REG:
2093 case PACKET3_SET_SH_REG_OFFSET:
2094 case PACKET3_INCREMENT_DE_COUNTER:
2095 case PACKET3_WAIT_ON_CE_COUNTER:
2096 case PACKET3_WAIT_ON_AVAIL_BUFFER:
2097 case PACKET3_ME_WRITE:
2098 break;
2099 case PACKET3_COPY_DATA:
2100 if ((idx_value & 0xf00) == 0) {
2101 reg = ib[idx + 3] * 4;
2102 if (!si_vm_reg_valid(reg))
2103 return -EINVAL;
2104 }
2105 break;
2106 case PACKET3_WRITE_DATA:
2107 if ((idx_value & 0xf00) == 0) {
2108 start_reg = ib[idx + 1] * 4;
2109 if (idx_value & 0x10000) {
2110 if (!si_vm_reg_valid(start_reg))
2111 return -EINVAL;
2112 } else {
2113 for (i = 0; i < (pkt->count - 2); i++) {
2114 reg = start_reg + (4 * i);
2115 if (!si_vm_reg_valid(reg))
2116 return -EINVAL;
2117 }
2118 }
2119 }
2120 break;
2121 case PACKET3_COND_WRITE:
2122 if (idx_value & 0x100) {
2123 reg = ib[idx + 5] * 4;
2124 if (!si_vm_reg_valid(reg))
2125 return -EINVAL;
2126 }
2127 break;
2128 case PACKET3_COPY_DW:
2129 if (idx_value & 0x2) {
2130 reg = ib[idx + 3] * 4;
2131 if (!si_vm_reg_valid(reg))
2132 return -EINVAL;
2133 }
2134 break;
2135 default:
2136 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2137 return -EINVAL;
2138 }
2139 return 0;
2140}
2141
2142int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2143{
2144 int ret = 0;
2145 u32 idx = 0;
2146 struct radeon_cs_packet pkt;
2147
2148 do {
2149 pkt.idx = idx;
2150 pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2151 pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2152 pkt.one_reg_wr = 0;
2153 switch (pkt.type) {
2154 case PACKET_TYPE0:
2155 dev_err(rdev->dev, "Packet0 not allowed!\n");
2156 ret = -EINVAL;
2157 break;
2158 case PACKET_TYPE2:
2159 idx += 1;
2160 break;
2161 case PACKET_TYPE3:
2162 pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2163 if (ib->is_const_ib)
2164 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2165 else {
2166 switch (ib->fence->ring) {
2167 case RADEON_RING_TYPE_GFX_INDEX:
2168 ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2169 break;
2170 case CAYMAN_RING_TYPE_CP1_INDEX:
2171 case CAYMAN_RING_TYPE_CP2_INDEX:
2172 ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2173 break;
2174 default:
2175 dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
2176 ret = -EINVAL;
2177 break;
2178 }
2179 }
2180 idx += pkt.count + 2;
2181 break;
2182 default:
2183 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2184 ret = -EINVAL;
2185 break;
2186 }
2187 if (ret)
2188 break;
2189 } while (idx < ib->length_dw);
2190
2191 return ret;
2192}
2193
Alex Deucherd2800ee2012-03-20 17:18:13 -04002194/*
2195 * vm
2196 */
2197int si_vm_init(struct radeon_device *rdev)
2198{
2199 /* number of VMs */
2200 rdev->vm_manager.nvm = 16;
2201 /* base offset of vram pages */
2202 rdev->vm_manager.vram_base_offset = 0;
2203
2204 return 0;
2205}
2206
2207void si_vm_fini(struct radeon_device *rdev)
2208{
2209}
2210
2211int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
2212{
2213 if (id < 8)
2214 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
2215 else
2216 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
2217 vm->pt_gpu_addr >> 12);
2218 /* flush hdp cache */
2219 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2220 /* bits 0-15 are the VM contexts0-15 */
2221 WREG32(VM_INVALIDATE_REQUEST, 1 << id);
2222 return 0;
2223}
2224
2225void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
2226{
2227 if (vm->id < 8)
2228 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
2229 else
2230 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0);
2231 /* flush hdp cache */
2232 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2233 /* bits 0-15 are the VM contexts0-15 */
2234 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2235}
2236
2237void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
2238{
2239 if (vm->id == -1)
2240 return;
2241
2242 /* flush hdp cache */
2243 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2244 /* bits 0-15 are the VM contexts0-15 */
2245 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2246}
2247