blob: 28a7531e01576c62faf4ddede7da46a382b5ac82 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
33
Alex Deucher6f2043c2013-04-09 12:43:41 -040034extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
35extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deucher1c491652013-04-09 12:45:26 -040036extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher6f2043c2013-04-09 12:43:41 -040037
Alex Deucher8cc1a532013-04-09 12:41:24 -040038/*
39 * Core functions
40 */
41/**
42 * cik_tiling_mode_table_init - init the hw tiling table
43 *
44 * @rdev: radeon_device pointer
45 *
46 * Starting with SI, the tiling setup is done globally in a
47 * set of 32 tiling modes. Rather than selecting each set of
48 * parameters per surface as on older asics, we just select
49 * which index in the tiling table we want to use, and the
50 * surface uses those parameters (CIK).
51 */
52static void cik_tiling_mode_table_init(struct radeon_device *rdev)
53{
54 const u32 num_tile_mode_states = 32;
55 const u32 num_secondary_tile_mode_states = 16;
56 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
57 u32 num_pipe_configs;
58 u32 num_rbs = rdev->config.cik.max_backends_per_se *
59 rdev->config.cik.max_shader_engines;
60
61 switch (rdev->config.cik.mem_row_size_in_kb) {
62 case 1:
63 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
64 break;
65 case 2:
66 default:
67 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
68 break;
69 case 4:
70 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
71 break;
72 }
73
74 num_pipe_configs = rdev->config.cik.max_tile_pipes;
75 if (num_pipe_configs > 8)
76 num_pipe_configs = 8; /* ??? */
77
78 if (num_pipe_configs == 8) {
79 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
80 switch (reg_offset) {
81 case 0:
82 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
83 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
84 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
85 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
86 break;
87 case 1:
88 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
89 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
90 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
91 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
92 break;
93 case 2:
94 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
95 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
96 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
97 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
98 break;
99 case 3:
100 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
103 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
104 break;
105 case 4:
106 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
107 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
108 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
109 TILE_SPLIT(split_equal_to_row_size));
110 break;
111 case 5:
112 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
113 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
114 break;
115 case 6:
116 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
119 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
120 break;
121 case 7:
122 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
124 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
125 TILE_SPLIT(split_equal_to_row_size));
126 break;
127 case 8:
128 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
129 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
130 break;
131 case 9:
132 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
133 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
134 break;
135 case 10:
136 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
137 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
138 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
140 break;
141 case 11:
142 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
143 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
144 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
146 break;
147 case 12:
148 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
149 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
150 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
152 break;
153 case 13:
154 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
156 break;
157 case 14:
158 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
160 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
162 break;
163 case 16:
164 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
165 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
166 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
168 break;
169 case 17:
170 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
171 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
172 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
174 break;
175 case 27:
176 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
177 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
178 break;
179 case 28:
180 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
182 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
184 break;
185 case 29:
186 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
187 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
188 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
190 break;
191 case 30:
192 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
193 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
194 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
196 break;
197 default:
198 gb_tile_moden = 0;
199 break;
200 }
201 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
202 }
203 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
204 switch (reg_offset) {
205 case 0:
206 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
209 NUM_BANKS(ADDR_SURF_16_BANK));
210 break;
211 case 1:
212 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
215 NUM_BANKS(ADDR_SURF_16_BANK));
216 break;
217 case 2:
218 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
221 NUM_BANKS(ADDR_SURF_16_BANK));
222 break;
223 case 3:
224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
227 NUM_BANKS(ADDR_SURF_16_BANK));
228 break;
229 case 4:
230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
233 NUM_BANKS(ADDR_SURF_8_BANK));
234 break;
235 case 5:
236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
239 NUM_BANKS(ADDR_SURF_4_BANK));
240 break;
241 case 6:
242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
245 NUM_BANKS(ADDR_SURF_2_BANK));
246 break;
247 case 8:
248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
251 NUM_BANKS(ADDR_SURF_16_BANK));
252 break;
253 case 9:
254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
257 NUM_BANKS(ADDR_SURF_16_BANK));
258 break;
259 case 10:
260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
263 NUM_BANKS(ADDR_SURF_16_BANK));
264 break;
265 case 11:
266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
269 NUM_BANKS(ADDR_SURF_16_BANK));
270 break;
271 case 12:
272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
275 NUM_BANKS(ADDR_SURF_8_BANK));
276 break;
277 case 13:
278 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
281 NUM_BANKS(ADDR_SURF_4_BANK));
282 break;
283 case 14:
284 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
287 NUM_BANKS(ADDR_SURF_2_BANK));
288 break;
289 default:
290 gb_tile_moden = 0;
291 break;
292 }
293 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
294 }
295 } else if (num_pipe_configs == 4) {
296 if (num_rbs == 4) {
297 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
298 switch (reg_offset) {
299 case 0:
300 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
302 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
303 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
304 break;
305 case 1:
306 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
308 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
310 break;
311 case 2:
312 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
314 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
315 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
316 break;
317 case 3:
318 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
319 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
320 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
321 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
322 break;
323 case 4:
324 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
326 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
327 TILE_SPLIT(split_equal_to_row_size));
328 break;
329 case 5:
330 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
332 break;
333 case 6:
334 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
336 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
338 break;
339 case 7:
340 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
342 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
343 TILE_SPLIT(split_equal_to_row_size));
344 break;
345 case 8:
346 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
347 PIPE_CONFIG(ADDR_SURF_P4_16x16));
348 break;
349 case 9:
350 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
351 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
352 break;
353 case 10:
354 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
355 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
356 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
358 break;
359 case 11:
360 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
361 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
362 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
364 break;
365 case 12:
366 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
367 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
368 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
370 break;
371 case 13:
372 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
373 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
374 break;
375 case 14:
376 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
377 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
378 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
380 break;
381 case 16:
382 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
383 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
384 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
386 break;
387 case 17:
388 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
390 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
392 break;
393 case 27:
394 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
395 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
396 break;
397 case 28:
398 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
399 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
400 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
402 break;
403 case 29:
404 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
405 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
406 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
408 break;
409 case 30:
410 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
411 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
412 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
414 break;
415 default:
416 gb_tile_moden = 0;
417 break;
418 }
419 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
420 }
421 } else if (num_rbs < 4) {
422 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
423 switch (reg_offset) {
424 case 0:
425 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
427 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
428 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
429 break;
430 case 1:
431 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
432 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
433 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
434 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
435 break;
436 case 2:
437 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
438 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
439 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
441 break;
442 case 3:
443 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
444 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
445 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
446 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
447 break;
448 case 4:
449 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
451 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
452 TILE_SPLIT(split_equal_to_row_size));
453 break;
454 case 5:
455 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
456 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
457 break;
458 case 6:
459 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
460 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
461 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
462 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
463 break;
464 case 7:
465 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
466 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
467 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
468 TILE_SPLIT(split_equal_to_row_size));
469 break;
470 case 8:
471 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
472 PIPE_CONFIG(ADDR_SURF_P4_8x16));
473 break;
474 case 9:
475 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
476 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
477 break;
478 case 10:
479 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
480 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
481 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
483 break;
484 case 11:
485 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
487 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
489 break;
490 case 12:
491 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
492 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
493 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
495 break;
496 case 13:
497 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
499 break;
500 case 14:
501 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
503 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
505 break;
506 case 16:
507 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
508 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
509 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
511 break;
512 case 17:
513 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
514 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
515 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
517 break;
518 case 27:
519 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
520 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
521 break;
522 case 28:
523 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
524 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
525 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
526 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
527 break;
528 case 29:
529 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
530 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
531 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
533 break;
534 case 30:
535 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
536 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
537 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
538 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
539 break;
540 default:
541 gb_tile_moden = 0;
542 break;
543 }
544 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
545 }
546 }
547 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
548 switch (reg_offset) {
549 case 0:
550 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
553 NUM_BANKS(ADDR_SURF_16_BANK));
554 break;
555 case 1:
556 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
559 NUM_BANKS(ADDR_SURF_16_BANK));
560 break;
561 case 2:
562 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
565 NUM_BANKS(ADDR_SURF_16_BANK));
566 break;
567 case 3:
568 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
571 NUM_BANKS(ADDR_SURF_16_BANK));
572 break;
573 case 4:
574 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
577 NUM_BANKS(ADDR_SURF_16_BANK));
578 break;
579 case 5:
580 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
581 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
582 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
583 NUM_BANKS(ADDR_SURF_8_BANK));
584 break;
585 case 6:
586 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
587 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
588 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
589 NUM_BANKS(ADDR_SURF_4_BANK));
590 break;
591 case 8:
592 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
593 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
594 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
595 NUM_BANKS(ADDR_SURF_16_BANK));
596 break;
597 case 9:
598 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
599 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
600 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
601 NUM_BANKS(ADDR_SURF_16_BANK));
602 break;
603 case 10:
604 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
605 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
606 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
607 NUM_BANKS(ADDR_SURF_16_BANK));
608 break;
609 case 11:
610 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
611 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
612 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
613 NUM_BANKS(ADDR_SURF_16_BANK));
614 break;
615 case 12:
616 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
617 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
618 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
619 NUM_BANKS(ADDR_SURF_16_BANK));
620 break;
621 case 13:
622 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
623 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
624 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
625 NUM_BANKS(ADDR_SURF_8_BANK));
626 break;
627 case 14:
628 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
629 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
630 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
631 NUM_BANKS(ADDR_SURF_4_BANK));
632 break;
633 default:
634 gb_tile_moden = 0;
635 break;
636 }
637 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
638 }
639 } else if (num_pipe_configs == 2) {
640 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
641 switch (reg_offset) {
642 case 0:
643 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
644 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
645 PIPE_CONFIG(ADDR_SURF_P2) |
646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
647 break;
648 case 1:
649 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
651 PIPE_CONFIG(ADDR_SURF_P2) |
652 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
653 break;
654 case 2:
655 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
656 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
657 PIPE_CONFIG(ADDR_SURF_P2) |
658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
659 break;
660 case 3:
661 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
662 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
663 PIPE_CONFIG(ADDR_SURF_P2) |
664 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
665 break;
666 case 4:
667 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
668 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
669 PIPE_CONFIG(ADDR_SURF_P2) |
670 TILE_SPLIT(split_equal_to_row_size));
671 break;
672 case 5:
673 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
674 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
675 break;
676 case 6:
677 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
678 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
679 PIPE_CONFIG(ADDR_SURF_P2) |
680 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
681 break;
682 case 7:
683 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
684 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
685 PIPE_CONFIG(ADDR_SURF_P2) |
686 TILE_SPLIT(split_equal_to_row_size));
687 break;
688 case 8:
689 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
690 break;
691 case 9:
692 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
693 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
694 break;
695 case 10:
696 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
697 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
698 PIPE_CONFIG(ADDR_SURF_P2) |
699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
700 break;
701 case 11:
702 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
703 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
704 PIPE_CONFIG(ADDR_SURF_P2) |
705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
706 break;
707 case 12:
708 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
709 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
710 PIPE_CONFIG(ADDR_SURF_P2) |
711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
712 break;
713 case 13:
714 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
715 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
716 break;
717 case 14:
718 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
719 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
720 PIPE_CONFIG(ADDR_SURF_P2) |
721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
722 break;
723 case 16:
724 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
725 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
726 PIPE_CONFIG(ADDR_SURF_P2) |
727 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
728 break;
729 case 17:
730 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
731 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
732 PIPE_CONFIG(ADDR_SURF_P2) |
733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
734 break;
735 case 27:
736 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
737 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
738 break;
739 case 28:
740 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
742 PIPE_CONFIG(ADDR_SURF_P2) |
743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
744 break;
745 case 29:
746 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
747 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
748 PIPE_CONFIG(ADDR_SURF_P2) |
749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
750 break;
751 case 30:
752 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
753 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
754 PIPE_CONFIG(ADDR_SURF_P2) |
755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
756 break;
757 default:
758 gb_tile_moden = 0;
759 break;
760 }
761 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
762 }
763 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
764 switch (reg_offset) {
765 case 0:
766 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
769 NUM_BANKS(ADDR_SURF_16_BANK));
770 break;
771 case 1:
772 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
775 NUM_BANKS(ADDR_SURF_16_BANK));
776 break;
777 case 2:
778 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
781 NUM_BANKS(ADDR_SURF_16_BANK));
782 break;
783 case 3:
784 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
787 NUM_BANKS(ADDR_SURF_16_BANK));
788 break;
789 case 4:
790 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
793 NUM_BANKS(ADDR_SURF_16_BANK));
794 break;
795 case 5:
796 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
799 NUM_BANKS(ADDR_SURF_16_BANK));
800 break;
801 case 6:
802 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
805 NUM_BANKS(ADDR_SURF_8_BANK));
806 break;
807 case 8:
808 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
811 NUM_BANKS(ADDR_SURF_16_BANK));
812 break;
813 case 9:
814 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
815 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
816 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
817 NUM_BANKS(ADDR_SURF_16_BANK));
818 break;
819 case 10:
820 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
823 NUM_BANKS(ADDR_SURF_16_BANK));
824 break;
825 case 11:
826 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
829 NUM_BANKS(ADDR_SURF_16_BANK));
830 break;
831 case 12:
832 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
835 NUM_BANKS(ADDR_SURF_16_BANK));
836 break;
837 case 13:
838 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
839 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
840 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
841 NUM_BANKS(ADDR_SURF_16_BANK));
842 break;
843 case 14:
844 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
847 NUM_BANKS(ADDR_SURF_8_BANK));
848 break;
849 default:
850 gb_tile_moden = 0;
851 break;
852 }
853 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
854 }
855 } else
856 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
857}
858
859/**
860 * cik_select_se_sh - select which SE, SH to address
861 *
862 * @rdev: radeon_device pointer
863 * @se_num: shader engine to address
864 * @sh_num: sh block to address
865 *
866 * Select which SE, SH combinations to address. Certain
867 * registers are instanced per SE or SH. 0xffffffff means
868 * broadcast to all SEs or SHs (CIK).
869 */
870static void cik_select_se_sh(struct radeon_device *rdev,
871 u32 se_num, u32 sh_num)
872{
873 u32 data = INSTANCE_BROADCAST_WRITES;
874
875 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
876 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
877 else if (se_num == 0xffffffff)
878 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
879 else if (sh_num == 0xffffffff)
880 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
881 else
882 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
883 WREG32(GRBM_GFX_INDEX, data);
884}
885
886/**
887 * cik_create_bitmask - create a bitmask
888 *
889 * @bit_width: length of the mask
890 *
891 * create a variable length bit mask (CIK).
892 * Returns the bitmask.
893 */
894static u32 cik_create_bitmask(u32 bit_width)
895{
896 u32 i, mask = 0;
897
898 for (i = 0; i < bit_width; i++) {
899 mask <<= 1;
900 mask |= 1;
901 }
902 return mask;
903}
904
905/**
906 * cik_select_se_sh - select which SE, SH to address
907 *
908 * @rdev: radeon_device pointer
909 * @max_rb_num: max RBs (render backends) for the asic
910 * @se_num: number of SEs (shader engines) for the asic
911 * @sh_per_se: number of SH blocks per SE for the asic
912 *
913 * Calculates the bitmask of disabled RBs (CIK).
914 * Returns the disabled RB bitmask.
915 */
916static u32 cik_get_rb_disabled(struct radeon_device *rdev,
917 u32 max_rb_num, u32 se_num,
918 u32 sh_per_se)
919{
920 u32 data, mask;
921
922 data = RREG32(CC_RB_BACKEND_DISABLE);
923 if (data & 1)
924 data &= BACKEND_DISABLE_MASK;
925 else
926 data = 0;
927 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
928
929 data >>= BACKEND_DISABLE_SHIFT;
930
931 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
932
933 return data & mask;
934}
935
936/**
937 * cik_setup_rb - setup the RBs on the asic
938 *
939 * @rdev: radeon_device pointer
940 * @se_num: number of SEs (shader engines) for the asic
941 * @sh_per_se: number of SH blocks per SE for the asic
942 * @max_rb_num: max RBs (render backends) for the asic
943 *
944 * Configures per-SE/SH RB registers (CIK).
945 */
946static void cik_setup_rb(struct radeon_device *rdev,
947 u32 se_num, u32 sh_per_se,
948 u32 max_rb_num)
949{
950 int i, j;
951 u32 data, mask;
952 u32 disabled_rbs = 0;
953 u32 enabled_rbs = 0;
954
955 for (i = 0; i < se_num; i++) {
956 for (j = 0; j < sh_per_se; j++) {
957 cik_select_se_sh(rdev, i, j);
958 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
959 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
960 }
961 }
962 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
963
964 mask = 1;
965 for (i = 0; i < max_rb_num; i++) {
966 if (!(disabled_rbs & mask))
967 enabled_rbs |= mask;
968 mask <<= 1;
969 }
970
971 for (i = 0; i < se_num; i++) {
972 cik_select_se_sh(rdev, i, 0xffffffff);
973 data = 0;
974 for (j = 0; j < sh_per_se; j++) {
975 switch (enabled_rbs & 3) {
976 case 1:
977 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
978 break;
979 case 2:
980 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
981 break;
982 case 3:
983 default:
984 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
985 break;
986 }
987 enabled_rbs >>= 2;
988 }
989 WREG32(PA_SC_RASTER_CONFIG, data);
990 }
991 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
992}
993
994/**
995 * cik_gpu_init - setup the 3D engine
996 *
997 * @rdev: radeon_device pointer
998 *
999 * Configures the 3D engine and tiling configuration
1000 * registers so that the 3D engine is usable.
1001 */
1002static void cik_gpu_init(struct radeon_device *rdev)
1003{
1004 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1005 u32 mc_shared_chmap, mc_arb_ramcfg;
1006 u32 hdp_host_path_cntl;
1007 u32 tmp;
1008 int i, j;
1009
1010 switch (rdev->family) {
1011 case CHIP_BONAIRE:
1012 rdev->config.cik.max_shader_engines = 2;
1013 rdev->config.cik.max_tile_pipes = 4;
1014 rdev->config.cik.max_cu_per_sh = 7;
1015 rdev->config.cik.max_sh_per_se = 1;
1016 rdev->config.cik.max_backends_per_se = 2;
1017 rdev->config.cik.max_texture_channel_caches = 4;
1018 rdev->config.cik.max_gprs = 256;
1019 rdev->config.cik.max_gs_threads = 32;
1020 rdev->config.cik.max_hw_contexts = 8;
1021
1022 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1023 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1024 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1025 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1026 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1027 break;
1028 case CHIP_KAVERI:
1029 /* TODO */
1030 break;
1031 case CHIP_KABINI:
1032 default:
1033 rdev->config.cik.max_shader_engines = 1;
1034 rdev->config.cik.max_tile_pipes = 2;
1035 rdev->config.cik.max_cu_per_sh = 2;
1036 rdev->config.cik.max_sh_per_se = 1;
1037 rdev->config.cik.max_backends_per_se = 1;
1038 rdev->config.cik.max_texture_channel_caches = 2;
1039 rdev->config.cik.max_gprs = 256;
1040 rdev->config.cik.max_gs_threads = 16;
1041 rdev->config.cik.max_hw_contexts = 8;
1042
1043 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1044 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1045 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1046 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1047 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1048 break;
1049 }
1050
1051 /* Initialize HDP */
1052 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1053 WREG32((0x2c14 + j), 0x00000000);
1054 WREG32((0x2c18 + j), 0x00000000);
1055 WREG32((0x2c1c + j), 0x00000000);
1056 WREG32((0x2c20 + j), 0x00000000);
1057 WREG32((0x2c24 + j), 0x00000000);
1058 }
1059
1060 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1061
1062 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1063
1064 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1065 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1066
1067 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1068 rdev->config.cik.mem_max_burst_length_bytes = 256;
1069 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1070 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1071 if (rdev->config.cik.mem_row_size_in_kb > 4)
1072 rdev->config.cik.mem_row_size_in_kb = 4;
1073 /* XXX use MC settings? */
1074 rdev->config.cik.shader_engine_tile_size = 32;
1075 rdev->config.cik.num_gpus = 1;
1076 rdev->config.cik.multi_gpu_tile_size = 64;
1077
1078 /* fix up row size */
1079 gb_addr_config &= ~ROW_SIZE_MASK;
1080 switch (rdev->config.cik.mem_row_size_in_kb) {
1081 case 1:
1082 default:
1083 gb_addr_config |= ROW_SIZE(0);
1084 break;
1085 case 2:
1086 gb_addr_config |= ROW_SIZE(1);
1087 break;
1088 case 4:
1089 gb_addr_config |= ROW_SIZE(2);
1090 break;
1091 }
1092
1093 /* setup tiling info dword. gb_addr_config is not adequate since it does
1094 * not have bank info, so create a custom tiling dword.
1095 * bits 3:0 num_pipes
1096 * bits 7:4 num_banks
1097 * bits 11:8 group_size
1098 * bits 15:12 row_size
1099 */
1100 rdev->config.cik.tile_config = 0;
1101 switch (rdev->config.cik.num_tile_pipes) {
1102 case 1:
1103 rdev->config.cik.tile_config |= (0 << 0);
1104 break;
1105 case 2:
1106 rdev->config.cik.tile_config |= (1 << 0);
1107 break;
1108 case 4:
1109 rdev->config.cik.tile_config |= (2 << 0);
1110 break;
1111 case 8:
1112 default:
1113 /* XXX what about 12? */
1114 rdev->config.cik.tile_config |= (3 << 0);
1115 break;
1116 }
1117 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1118 rdev->config.cik.tile_config |= 1 << 4;
1119 else
1120 rdev->config.cik.tile_config |= 0 << 4;
1121 rdev->config.cik.tile_config |=
1122 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1123 rdev->config.cik.tile_config |=
1124 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1125
1126 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1127 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1128 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1129
1130 cik_tiling_mode_table_init(rdev);
1131
1132 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1133 rdev->config.cik.max_sh_per_se,
1134 rdev->config.cik.max_backends_per_se);
1135
1136 /* set HW defaults for 3D engine */
1137 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1138
1139 WREG32(SX_DEBUG_1, 0x20);
1140
1141 WREG32(TA_CNTL_AUX, 0x00010000);
1142
1143 tmp = RREG32(SPI_CONFIG_CNTL);
1144 tmp |= 0x03000000;
1145 WREG32(SPI_CONFIG_CNTL, tmp);
1146
1147 WREG32(SQ_CONFIG, 1);
1148
1149 WREG32(DB_DEBUG, 0);
1150
1151 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1152 tmp |= 0x00000400;
1153 WREG32(DB_DEBUG2, tmp);
1154
1155 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1156 tmp |= 0x00020200;
1157 WREG32(DB_DEBUG3, tmp);
1158
1159 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1160 tmp |= 0x00018208;
1161 WREG32(CB_HW_CONTROL, tmp);
1162
1163 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1164
1165 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1166 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1167 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1168 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1169
1170 WREG32(VGT_NUM_INSTANCES, 1);
1171
1172 WREG32(CP_PERFMON_CNTL, 0);
1173
1174 WREG32(SQ_CONFIG, 0);
1175
1176 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1177 FORCE_EOV_MAX_REZ_CNT(255)));
1178
1179 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1180 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1181
1182 WREG32(VGT_GS_VERTEX_REUSE, 16);
1183 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1184
1185 tmp = RREG32(HDP_MISC_CNTL);
1186 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1187 WREG32(HDP_MISC_CNTL, tmp);
1188
1189 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1190 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1191
1192 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1193 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1194
1195 udelay(50);
1196}
1197
Alex Deucher6f2043c2013-04-09 12:43:41 -04001198/**
1199 * cik_gpu_is_lockup - check if the 3D engine is locked up
1200 *
1201 * @rdev: radeon_device pointer
1202 * @ring: radeon_ring structure holding ring information
1203 *
1204 * Check if the 3D engine is locked up (CIK).
1205 * Returns true if the engine is locked, false if not.
1206 */
1207bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1208{
1209 u32 srbm_status, srbm_status2;
1210 u32 grbm_status, grbm_status2;
1211 u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
1212
1213 srbm_status = RREG32(SRBM_STATUS);
1214 srbm_status2 = RREG32(SRBM_STATUS2);
1215 grbm_status = RREG32(GRBM_STATUS);
1216 grbm_status2 = RREG32(GRBM_STATUS2);
1217 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
1218 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
1219 grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
1220 grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
1221 if (!(grbm_status & GUI_ACTIVE)) {
1222 radeon_ring_lockup_update(ring);
1223 return false;
1224 }
1225 /* force CP activities */
1226 radeon_ring_force_activity(rdev, ring);
1227 return radeon_ring_test_lockup(rdev, ring);
1228}
1229
1230/**
1231 * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
1232 *
1233 * @rdev: radeon_device pointer
1234 *
1235 * Soft reset the GFX engine and CPG blocks (CIK).
1236 * XXX: deal with reseting RLC and CPF
1237 * Returns 0 for success.
1238 */
1239static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
1240{
1241 struct evergreen_mc_save save;
1242 u32 grbm_reset = 0;
1243
1244 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
1245 return 0;
1246
1247 dev_info(rdev->dev, "GPU GFX softreset \n");
1248 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1249 RREG32(GRBM_STATUS));
1250 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1251 RREG32(GRBM_STATUS2));
1252 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1253 RREG32(GRBM_STATUS_SE0));
1254 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1255 RREG32(GRBM_STATUS_SE1));
1256 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1257 RREG32(GRBM_STATUS_SE2));
1258 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1259 RREG32(GRBM_STATUS_SE3));
1260 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1261 RREG32(SRBM_STATUS));
1262 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1263 RREG32(SRBM_STATUS2));
1264 evergreen_mc_stop(rdev, &save);
1265 if (radeon_mc_wait_for_idle(rdev)) {
1266 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1267 }
1268 /* Disable CP parsing/prefetching */
1269 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
1270
1271 /* reset all the gfx block and all CPG blocks */
1272 grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
1273
1274 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1275 WREG32(GRBM_SOFT_RESET, grbm_reset);
1276 (void)RREG32(GRBM_SOFT_RESET);
1277 udelay(50);
1278 WREG32(GRBM_SOFT_RESET, 0);
1279 (void)RREG32(GRBM_SOFT_RESET);
1280 /* Wait a little for things to settle down */
1281 udelay(50);
1282 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1283 RREG32(GRBM_STATUS));
1284 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1285 RREG32(GRBM_STATUS2));
1286 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1287 RREG32(GRBM_STATUS_SE0));
1288 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1289 RREG32(GRBM_STATUS_SE1));
1290 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1291 RREG32(GRBM_STATUS_SE2));
1292 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1293 RREG32(GRBM_STATUS_SE3));
1294 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1295 RREG32(SRBM_STATUS));
1296 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1297 RREG32(SRBM_STATUS2));
1298 evergreen_mc_resume(rdev, &save);
1299 return 0;
1300}
1301
1302/**
1303 * cik_compute_gpu_soft_reset - soft reset CPC
1304 *
1305 * @rdev: radeon_device pointer
1306 *
1307 * Soft reset the CPC blocks (CIK).
1308 * XXX: deal with reseting RLC and CPF
1309 * Returns 0 for success.
1310 */
1311static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
1312{
1313 struct evergreen_mc_save save;
1314 u32 grbm_reset = 0;
1315
1316 dev_info(rdev->dev, "GPU compute softreset \n");
1317 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1318 RREG32(GRBM_STATUS));
1319 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1320 RREG32(GRBM_STATUS2));
1321 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1322 RREG32(GRBM_STATUS_SE0));
1323 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1324 RREG32(GRBM_STATUS_SE1));
1325 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1326 RREG32(GRBM_STATUS_SE2));
1327 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1328 RREG32(GRBM_STATUS_SE3));
1329 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1330 RREG32(SRBM_STATUS));
1331 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1332 RREG32(SRBM_STATUS2));
1333 evergreen_mc_stop(rdev, &save);
1334 if (radeon_mc_wait_for_idle(rdev)) {
1335 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1336 }
1337 /* Disable CP parsing/prefetching */
1338 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
1339
1340 /* reset all the CPC blocks */
1341 grbm_reset = SOFT_RESET_CPG;
1342
1343 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
1344 WREG32(GRBM_SOFT_RESET, grbm_reset);
1345 (void)RREG32(GRBM_SOFT_RESET);
1346 udelay(50);
1347 WREG32(GRBM_SOFT_RESET, 0);
1348 (void)RREG32(GRBM_SOFT_RESET);
1349 /* Wait a little for things to settle down */
1350 udelay(50);
1351 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1352 RREG32(GRBM_STATUS));
1353 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
1354 RREG32(GRBM_STATUS2));
1355 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1356 RREG32(GRBM_STATUS_SE0));
1357 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1358 RREG32(GRBM_STATUS_SE1));
1359 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1360 RREG32(GRBM_STATUS_SE2));
1361 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1362 RREG32(GRBM_STATUS_SE3));
1363 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1364 RREG32(SRBM_STATUS));
1365 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
1366 RREG32(SRBM_STATUS2));
1367 evergreen_mc_resume(rdev, &save);
1368 return 0;
1369}
1370
1371/**
1372 * cik_asic_reset - soft reset compute and gfx
1373 *
1374 * @rdev: radeon_device pointer
1375 *
1376 * Soft reset the CPC blocks (CIK).
1377 * XXX: make this more fine grained and only reset
1378 * what is necessary.
1379 * Returns 0 for success.
1380 */
1381int cik_asic_reset(struct radeon_device *rdev)
1382{
1383 int r;
1384
1385 r = cik_compute_gpu_soft_reset(rdev);
1386 if (r)
1387 dev_info(rdev->dev, "Compute reset failed!\n");
1388
1389 return cik_gfx_gpu_soft_reset(rdev);
1390}
Alex Deucher1c491652013-04-09 12:45:26 -04001391
1392/* MC */
1393/**
1394 * cik_mc_program - program the GPU memory controller
1395 *
1396 * @rdev: radeon_device pointer
1397 *
1398 * Set the location of vram, gart, and AGP in the GPU's
1399 * physical address space (CIK).
1400 */
1401static void cik_mc_program(struct radeon_device *rdev)
1402{
1403 struct evergreen_mc_save save;
1404 u32 tmp;
1405 int i, j;
1406
1407 /* Initialize HDP */
1408 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1409 WREG32((0x2c14 + j), 0x00000000);
1410 WREG32((0x2c18 + j), 0x00000000);
1411 WREG32((0x2c1c + j), 0x00000000);
1412 WREG32((0x2c20 + j), 0x00000000);
1413 WREG32((0x2c24 + j), 0x00000000);
1414 }
1415 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
1416
1417 evergreen_mc_stop(rdev, &save);
1418 if (radeon_mc_wait_for_idle(rdev)) {
1419 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1420 }
1421 /* Lockout access through VGA aperture*/
1422 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
1423 /* Update configuration */
1424 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
1425 rdev->mc.vram_start >> 12);
1426 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
1427 rdev->mc.vram_end >> 12);
1428 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
1429 rdev->vram_scratch.gpu_addr >> 12);
1430 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
1431 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
1432 WREG32(MC_VM_FB_LOCATION, tmp);
1433 /* XXX double check these! */
1434 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
1435 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
1436 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
1437 WREG32(MC_VM_AGP_BASE, 0);
1438 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
1439 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
1440 if (radeon_mc_wait_for_idle(rdev)) {
1441 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1442 }
1443 evergreen_mc_resume(rdev, &save);
1444 /* we need to own VRAM, so turn off the VGA renderer here
1445 * to stop it overwriting our objects */
1446 rv515_vga_render_disable(rdev);
1447}
1448
1449/**
1450 * cik_mc_init - initialize the memory controller driver params
1451 *
1452 * @rdev: radeon_device pointer
1453 *
1454 * Look up the amount of vram, vram width, and decide how to place
1455 * vram and gart within the GPU's physical address space (CIK).
1456 * Returns 0 for success.
1457 */
1458static int cik_mc_init(struct radeon_device *rdev)
1459{
1460 u32 tmp;
1461 int chansize, numchan;
1462
1463 /* Get VRAM informations */
1464 rdev->mc.vram_is_ddr = true;
1465 tmp = RREG32(MC_ARB_RAMCFG);
1466 if (tmp & CHANSIZE_MASK) {
1467 chansize = 64;
1468 } else {
1469 chansize = 32;
1470 }
1471 tmp = RREG32(MC_SHARED_CHMAP);
1472 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1473 case 0:
1474 default:
1475 numchan = 1;
1476 break;
1477 case 1:
1478 numchan = 2;
1479 break;
1480 case 2:
1481 numchan = 4;
1482 break;
1483 case 3:
1484 numchan = 8;
1485 break;
1486 case 4:
1487 numchan = 3;
1488 break;
1489 case 5:
1490 numchan = 6;
1491 break;
1492 case 6:
1493 numchan = 10;
1494 break;
1495 case 7:
1496 numchan = 12;
1497 break;
1498 case 8:
1499 numchan = 16;
1500 break;
1501 }
1502 rdev->mc.vram_width = numchan * chansize;
1503 /* Could aper size report 0 ? */
1504 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
1505 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
1506 /* size in MB on si */
1507 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1508 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
1509 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1510 si_vram_gtt_location(rdev, &rdev->mc);
1511 radeon_update_bandwidth_info(rdev);
1512
1513 return 0;
1514}
1515
1516/*
1517 * GART
1518 * VMID 0 is the physical GPU addresses as used by the kernel.
1519 * VMIDs 1-15 are used for userspace clients and are handled
1520 * by the radeon vm/hsa code.
1521 */
1522/**
1523 * cik_pcie_gart_tlb_flush - gart tlb flush callback
1524 *
1525 * @rdev: radeon_device pointer
1526 *
1527 * Flush the TLB for the VMID 0 page table (CIK).
1528 */
1529void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
1530{
1531 /* flush hdp cache */
1532 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
1533
1534 /* bits 0-15 are the VM contexts0-15 */
1535 WREG32(VM_INVALIDATE_REQUEST, 0x1);
1536}
1537
1538/**
1539 * cik_pcie_gart_enable - gart enable
1540 *
1541 * @rdev: radeon_device pointer
1542 *
1543 * This sets up the TLBs, programs the page tables for VMID0,
1544 * sets up the hw for VMIDs 1-15 which are allocated on
1545 * demand, and sets up the global locations for the LDS, GDS,
1546 * and GPUVM for FSA64 clients (CIK).
1547 * Returns 0 for success, errors for failure.
1548 */
1549static int cik_pcie_gart_enable(struct radeon_device *rdev)
1550{
1551 int r, i;
1552
1553 if (rdev->gart.robj == NULL) {
1554 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1555 return -EINVAL;
1556 }
1557 r = radeon_gart_table_vram_pin(rdev);
1558 if (r)
1559 return r;
1560 radeon_gart_restore(rdev);
1561 /* Setup TLB control */
1562 WREG32(MC_VM_MX_L1_TLB_CNTL,
1563 (0xA << 7) |
1564 ENABLE_L1_TLB |
1565 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1566 ENABLE_ADVANCED_DRIVER_MODEL |
1567 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1568 /* Setup L2 cache */
1569 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1570 ENABLE_L2_FRAGMENT_PROCESSING |
1571 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1572 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1573 EFFECTIVE_L2_QUEUE_SIZE(7) |
1574 CONTEXT1_IDENTITY_ACCESS_MODE(1));
1575 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1576 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1577 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1578 /* setup context0 */
1579 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1580 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1581 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1582 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1583 (u32)(rdev->dummy_page.addr >> 12));
1584 WREG32(VM_CONTEXT0_CNTL2, 0);
1585 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1586 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
1587
1588 WREG32(0x15D4, 0);
1589 WREG32(0x15D8, 0);
1590 WREG32(0x15DC, 0);
1591
1592 /* empty context1-15 */
1593 /* FIXME start with 4G, once using 2 level pt switch to full
1594 * vm size space
1595 */
1596 /* set vm size, must be a multiple of 4 */
1597 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
1598 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
1599 for (i = 1; i < 16; i++) {
1600 if (i < 8)
1601 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1602 rdev->gart.table_addr >> 12);
1603 else
1604 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
1605 rdev->gart.table_addr >> 12);
1606 }
1607
1608 /* enable context1-15 */
1609 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1610 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04001611 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04001612 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04001613 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1614 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1615 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1616 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1617 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1618 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1619 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1620 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1621 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1622 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1623 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1624 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04001625
1626 /* TC cache setup ??? */
1627 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
1628 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
1629 WREG32(TC_CFG_L1_STORE_POLICY, 0);
1630
1631 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
1632 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
1633 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
1634 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
1635 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
1636
1637 WREG32(TC_CFG_L1_VOLATILE, 0);
1638 WREG32(TC_CFG_L2_VOLATILE, 0);
1639
1640 if (rdev->family == CHIP_KAVERI) {
1641 u32 tmp = RREG32(CHUB_CONTROL);
1642 tmp &= ~BYPASS_VM;
1643 WREG32(CHUB_CONTROL, tmp);
1644 }
1645
1646 /* XXX SH_MEM regs */
1647 /* where to put LDS, scratch, GPUVM in FSA64 space */
1648 for (i = 0; i < 16; i++) {
1649 WREG32(SRBM_GFX_CNTL, VMID(i));
1650 WREG32(SH_MEM_CONFIG, 0);
1651 WREG32(SH_MEM_APE1_BASE, 1);
1652 WREG32(SH_MEM_APE1_LIMIT, 0);
1653 WREG32(SH_MEM_BASES, 0);
1654 }
1655 WREG32(SRBM_GFX_CNTL, 0);
1656
1657 cik_pcie_gart_tlb_flush(rdev);
1658 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1659 (unsigned)(rdev->mc.gtt_size >> 20),
1660 (unsigned long long)rdev->gart.table_addr);
1661 rdev->gart.ready = true;
1662 return 0;
1663}
1664
1665/**
1666 * cik_pcie_gart_disable - gart disable
1667 *
1668 * @rdev: radeon_device pointer
1669 *
1670 * This disables all VM page table (CIK).
1671 */
1672static void cik_pcie_gart_disable(struct radeon_device *rdev)
1673{
1674 /* Disable all tables */
1675 WREG32(VM_CONTEXT0_CNTL, 0);
1676 WREG32(VM_CONTEXT1_CNTL, 0);
1677 /* Setup TLB control */
1678 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1679 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1680 /* Setup L2 cache */
1681 WREG32(VM_L2_CNTL,
1682 ENABLE_L2_FRAGMENT_PROCESSING |
1683 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1684 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1685 EFFECTIVE_L2_QUEUE_SIZE(7) |
1686 CONTEXT1_IDENTITY_ACCESS_MODE(1));
1687 WREG32(VM_L2_CNTL2, 0);
1688 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1689 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1690 radeon_gart_table_vram_unpin(rdev);
1691}
1692
1693/**
1694 * cik_pcie_gart_fini - vm fini callback
1695 *
1696 * @rdev: radeon_device pointer
1697 *
1698 * Tears down the driver GART/VM setup (CIK).
1699 */
1700static void cik_pcie_gart_fini(struct radeon_device *rdev)
1701{
1702 cik_pcie_gart_disable(rdev);
1703 radeon_gart_table_vram_free(rdev);
1704 radeon_gart_fini(rdev);
1705}
1706
1707/* vm parser */
1708/**
1709 * cik_ib_parse - vm ib_parse callback
1710 *
1711 * @rdev: radeon_device pointer
1712 * @ib: indirect buffer pointer
1713 *
1714 * CIK uses hw IB checking so this is a nop (CIK).
1715 */
1716int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
1717{
1718 return 0;
1719}
1720
1721/*
1722 * vm
1723 * VMID 0 is the physical GPU addresses as used by the kernel.
1724 * VMIDs 1-15 are used for userspace clients and are handled
1725 * by the radeon vm/hsa code.
1726 */
1727/**
1728 * cik_vm_init - cik vm init callback
1729 *
1730 * @rdev: radeon_device pointer
1731 *
1732 * Inits cik specific vm parameters (number of VMs, base of vram for
1733 * VMIDs 1-15) (CIK).
1734 * Returns 0 for success.
1735 */
1736int cik_vm_init(struct radeon_device *rdev)
1737{
1738 /* number of VMs */
1739 rdev->vm_manager.nvm = 16;
1740 /* base offset of vram pages */
1741 if (rdev->flags & RADEON_IS_IGP) {
1742 u64 tmp = RREG32(MC_VM_FB_OFFSET);
1743 tmp <<= 22;
1744 rdev->vm_manager.vram_base_offset = tmp;
1745 } else
1746 rdev->vm_manager.vram_base_offset = 0;
1747
1748 return 0;
1749}
1750
1751/**
1752 * cik_vm_fini - cik vm fini callback
1753 *
1754 * @rdev: radeon_device pointer
1755 *
1756 * Tear down any asic specific VM setup (CIK).
1757 */
1758void cik_vm_fini(struct radeon_device *rdev)
1759{
1760}
1761