blob: c51402e92493e9bbb6062e69a9e106d1e7546a14 [file] [log] [blame]
Jerome Glisse3ce0a232009-09-08 10:10:24 +10001/*
2 * Copyright 2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Alex Deucher <alexander.deucher@amd.com>
25 */
26#include "drmP.h"
27#include "drm.h"
28#include "radeon_drm.h"
29#include "radeon_drv.h"
30
31#include "r600_blit_shaders.h"
32
33#define DI_PT_RECTLIST 0x11
34#define DI_INDEX_SIZE_16_BIT 0x0
35#define DI_SRC_SEL_AUTO_INDEX 0x2
36
37#define FMT_8 0x1
38#define FMT_5_6_5 0x8
39#define FMT_8_8_8_8 0x1a
40#define COLOR_8 0x1
41#define COLOR_5_6_5 0x8
42#define COLOR_8_8_8_8 0x1a
43
44static inline void
45set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
46{
47 u32 cb_color_info;
48 int pitch, slice;
49 RING_LOCALS;
50 DRM_DEBUG("\n");
51
52 h = (h + 7) & ~7;
53 if (h < 8)
54 h = 8;
55
56 cb_color_info = ((format << 2) | (1 << 27));
57 pitch = (w / 8) - 1;
58 slice = ((w * h) / 64) - 1;
59
60 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
61 ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
62 BEGIN_RING(21 + 2);
63 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
64 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
65 OUT_RING(gpu_addr >> 8);
66 OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
67 OUT_RING(2 << 0);
68 } else {
69 BEGIN_RING(21);
70 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
71 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
72 OUT_RING(gpu_addr >> 8);
73 }
74
75 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
76 OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
77 OUT_RING((pitch << 0) | (slice << 10));
78
79 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
80 OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
81 OUT_RING(0);
82
83 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
84 OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
85 OUT_RING(cb_color_info);
86
87 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
88 OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
89 OUT_RING(0);
90
91 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
92 OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
93 OUT_RING(0);
94
95 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
96 OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
97 OUT_RING(0);
98
99 ADVANCE_RING();
100}
101
102static inline void
103cp_set_surface_sync(drm_radeon_private_t *dev_priv,
104 u32 sync_type, u32 size, u64 mc_addr)
105{
106 u32 cp_coher_size;
107 RING_LOCALS;
108 DRM_DEBUG("\n");
109
110 if (size == 0xffffffff)
111 cp_coher_size = 0xffffffff;
112 else
113 cp_coher_size = ((size + 255) >> 8);
114
115 BEGIN_RING(5);
116 OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
117 OUT_RING(sync_type);
118 OUT_RING(cp_coher_size);
119 OUT_RING((mc_addr >> 8));
120 OUT_RING(10); /* poll interval */
121 ADVANCE_RING();
122}
123
124static inline void
125set_shaders(struct drm_device *dev)
126{
127 drm_radeon_private_t *dev_priv = dev->dev_private;
128 u64 gpu_addr;
129 int shader_size, i;
130 u32 *vs, *ps;
131 uint32_t sq_pgm_resources;
132 RING_LOCALS;
133 DRM_DEBUG("\n");
134
135 /* load shaders */
136 vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
137 ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
138
139 shader_size = r6xx_vs_size;
140 for (i = 0; i < shader_size; i++)
141 vs[i] = r6xx_vs[i];
142 shader_size = r6xx_ps_size;
143 for (i = 0; i < shader_size; i++)
144 ps[i] = r6xx_ps[i];
145
146 dev_priv->blit_vb->used = 512;
147
148 gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
149
150 /* setup shader regs */
151 sq_pgm_resources = (1 << 0);
152
153 BEGIN_RING(9 + 12);
154 /* VS */
155 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
156 OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
157 OUT_RING(gpu_addr >> 8);
158
159 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
160 OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
161 OUT_RING(sq_pgm_resources);
162
163 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
164 OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
165 OUT_RING(0);
166
167 /* PS */
168 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
169 OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
170 OUT_RING((gpu_addr + 256) >> 8);
171
172 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
173 OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
174 OUT_RING(sq_pgm_resources | (1 << 28));
175
176 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
177 OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
178 OUT_RING(2);
179
180 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
181 OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
182 OUT_RING(0);
183 ADVANCE_RING();
184
185 cp_set_surface_sync(dev_priv,
186 R600_SH_ACTION_ENA, 512, gpu_addr);
187}
188
189static inline void
190set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
191{
192 uint32_t sq_vtx_constant_word2;
193 RING_LOCALS;
194 DRM_DEBUG("\n");
195
196 sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
197
198 BEGIN_RING(9);
199 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
200 OUT_RING(0x460);
201 OUT_RING(gpu_addr & 0xffffffff);
202 OUT_RING(48 - 1);
203 OUT_RING(sq_vtx_constant_word2);
204 OUT_RING(1 << 0);
205 OUT_RING(0);
206 OUT_RING(0);
207 OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
208 ADVANCE_RING();
209
210 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
211 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
212 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
213 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
214 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
215 cp_set_surface_sync(dev_priv,
216 R600_TC_ACTION_ENA, 48, gpu_addr);
217 else
218 cp_set_surface_sync(dev_priv,
219 R600_VC_ACTION_ENA, 48, gpu_addr);
220}
221
222static inline void
223set_tex_resource(drm_radeon_private_t *dev_priv,
224 int format, int w, int h, int pitch, u64 gpu_addr)
225{
226 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
227 RING_LOCALS;
228 DRM_DEBUG("\n");
229
230 if (h < 1)
231 h = 1;
232
233 sq_tex_resource_word0 = (1 << 0);
234 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
235 ((w - 1) << 19));
236
237 sq_tex_resource_word1 = (format << 26);
238 sq_tex_resource_word1 |= ((h - 1) << 0);
239
240 sq_tex_resource_word4 = ((1 << 14) |
241 (0 << 16) |
242 (1 << 19) |
243 (2 << 22) |
244 (3 << 25));
245
246 BEGIN_RING(9);
247 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
248 OUT_RING(0);
249 OUT_RING(sq_tex_resource_word0);
250 OUT_RING(sq_tex_resource_word1);
251 OUT_RING(gpu_addr >> 8);
252 OUT_RING(gpu_addr >> 8);
253 OUT_RING(sq_tex_resource_word4);
254 OUT_RING(0);
255 OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
256 ADVANCE_RING();
257
258}
259
260static inline void
261set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
262{
263 RING_LOCALS;
264 DRM_DEBUG("\n");
265
266 BEGIN_RING(12);
267 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
268 OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
269 OUT_RING((x1 << 0) | (y1 << 16));
270 OUT_RING((x2 << 0) | (y2 << 16));
271
272 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
273 OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
274 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
275 OUT_RING((x2 << 0) | (y2 << 16));
276
277 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
278 OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
279 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
280 OUT_RING((x2 << 0) | (y2 << 16));
281 ADVANCE_RING();
282}
283
284static inline void
285draw_auto(drm_radeon_private_t *dev_priv)
286{
287 RING_LOCALS;
288 DRM_DEBUG("\n");
289
290 BEGIN_RING(10);
291 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
292 OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
293 OUT_RING(DI_PT_RECTLIST);
294
295 OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
296 OUT_RING(DI_INDEX_SIZE_16_BIT);
297
298 OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
299 OUT_RING(1);
300
301 OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
302 OUT_RING(3);
303 OUT_RING(DI_SRC_SEL_AUTO_INDEX);
304
305 ADVANCE_RING();
306 COMMIT_RING();
307}
308
309static inline void
310set_default_state(drm_radeon_private_t *dev_priv)
311{
312 int default_state_dw, i;
313 u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
314 u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
315 int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
316 int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
317 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
318 RING_LOCALS;
319
320 switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
321 case CHIP_R600:
322 num_ps_gprs = 192;
323 num_vs_gprs = 56;
324 num_temp_gprs = 4;
325 num_gs_gprs = 0;
326 num_es_gprs = 0;
327 num_ps_threads = 136;
328 num_vs_threads = 48;
329 num_gs_threads = 4;
330 num_es_threads = 4;
331 num_ps_stack_entries = 128;
332 num_vs_stack_entries = 128;
333 num_gs_stack_entries = 0;
334 num_es_stack_entries = 0;
335 break;
336 case CHIP_RV630:
337 case CHIP_RV635:
338 num_ps_gprs = 84;
339 num_vs_gprs = 36;
340 num_temp_gprs = 4;
341 num_gs_gprs = 0;
342 num_es_gprs = 0;
343 num_ps_threads = 144;
344 num_vs_threads = 40;
345 num_gs_threads = 4;
346 num_es_threads = 4;
347 num_ps_stack_entries = 40;
348 num_vs_stack_entries = 40;
349 num_gs_stack_entries = 32;
350 num_es_stack_entries = 16;
351 break;
352 case CHIP_RV610:
353 case CHIP_RV620:
354 case CHIP_RS780:
355 case CHIP_RS880:
356 default:
357 num_ps_gprs = 84;
358 num_vs_gprs = 36;
359 num_temp_gprs = 4;
360 num_gs_gprs = 0;
361 num_es_gprs = 0;
362 num_ps_threads = 136;
363 num_vs_threads = 48;
364 num_gs_threads = 4;
365 num_es_threads = 4;
366 num_ps_stack_entries = 40;
367 num_vs_stack_entries = 40;
368 num_gs_stack_entries = 32;
369 num_es_stack_entries = 16;
370 break;
371 case CHIP_RV670:
372 num_ps_gprs = 144;
373 num_vs_gprs = 40;
374 num_temp_gprs = 4;
375 num_gs_gprs = 0;
376 num_es_gprs = 0;
377 num_ps_threads = 136;
378 num_vs_threads = 48;
379 num_gs_threads = 4;
380 num_es_threads = 4;
381 num_ps_stack_entries = 40;
382 num_vs_stack_entries = 40;
383 num_gs_stack_entries = 32;
384 num_es_stack_entries = 16;
385 break;
386 case CHIP_RV770:
387 num_ps_gprs = 192;
388 num_vs_gprs = 56;
389 num_temp_gprs = 4;
390 num_gs_gprs = 0;
391 num_es_gprs = 0;
392 num_ps_threads = 188;
393 num_vs_threads = 60;
394 num_gs_threads = 0;
395 num_es_threads = 0;
396 num_ps_stack_entries = 256;
397 num_vs_stack_entries = 256;
398 num_gs_stack_entries = 0;
399 num_es_stack_entries = 0;
400 break;
401 case CHIP_RV730:
402 case CHIP_RV740:
403 num_ps_gprs = 84;
404 num_vs_gprs = 36;
405 num_temp_gprs = 4;
406 num_gs_gprs = 0;
407 num_es_gprs = 0;
408 num_ps_threads = 188;
409 num_vs_threads = 60;
410 num_gs_threads = 0;
411 num_es_threads = 0;
412 num_ps_stack_entries = 128;
413 num_vs_stack_entries = 128;
414 num_gs_stack_entries = 0;
415 num_es_stack_entries = 0;
416 break;
417 case CHIP_RV710:
418 num_ps_gprs = 192;
419 num_vs_gprs = 56;
420 num_temp_gprs = 4;
421 num_gs_gprs = 0;
422 num_es_gprs = 0;
423 num_ps_threads = 144;
424 num_vs_threads = 48;
425 num_gs_threads = 0;
426 num_es_threads = 0;
427 num_ps_stack_entries = 128;
428 num_vs_stack_entries = 128;
429 num_gs_stack_entries = 0;
430 num_es_stack_entries = 0;
431 break;
432 }
433
434 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
435 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
436 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
437 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
438 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
439 sq_config = 0;
440 else
441 sq_config = R600_VC_ENABLE;
442
443 sq_config |= (R600_DX9_CONSTS |
444 R600_ALU_INST_PREFER_VECTOR |
445 R600_PS_PRIO(0) |
446 R600_VS_PRIO(1) |
447 R600_GS_PRIO(2) |
448 R600_ES_PRIO(3));
449
450 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
451 R600_NUM_VS_GPRS(num_vs_gprs) |
452 R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
453 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
454 R600_NUM_ES_GPRS(num_es_gprs));
455 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
456 R600_NUM_VS_THREADS(num_vs_threads) |
457 R600_NUM_GS_THREADS(num_gs_threads) |
458 R600_NUM_ES_THREADS(num_es_threads));
459 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
460 R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
461 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
462 R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
463
464 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
465 default_state_dw = r7xx_default_size * 4;
466 BEGIN_RING(default_state_dw + 10);
467 for (i = 0; i < default_state_dw; i++)
468 OUT_RING(r7xx_default_state[i]);
469 } else {
470 default_state_dw = r6xx_default_size * 4;
471 BEGIN_RING(default_state_dw + 10);
472 for (i = 0; i < default_state_dw; i++)
473 OUT_RING(r6xx_default_state[i]);
474 }
475 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
476 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
477 /* SQ config */
478 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
479 OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
480 OUT_RING(sq_config);
481 OUT_RING(sq_gpr_resource_mgmt_1);
482 OUT_RING(sq_gpr_resource_mgmt_2);
483 OUT_RING(sq_thread_resource_mgmt);
484 OUT_RING(sq_stack_resource_mgmt_1);
485 OUT_RING(sq_stack_resource_mgmt_2);
486 ADVANCE_RING();
487}
488
489static inline uint32_t i2f(uint32_t input)
490{
491 u32 result, i, exponent, fraction;
492
493 if ((input & 0x3fff) == 0)
494 result = 0; /* 0 is a special case */
495 else {
496 exponent = 140; /* exponent biased by 127; */
497 fraction = (input & 0x3fff) << 10; /* cheat and only
498 handle numbers below 2^^15 */
499 for (i = 0; i < 14; i++) {
500 if (fraction & 0x800000)
501 break;
502 else {
503 fraction = fraction << 1; /* keep
504 shifting left until top bit = 1 */
505 exponent = exponent - 1;
506 }
507 }
508 result = exponent << 23 | (fraction & 0x7fffff); /* mask
509 off top bit; assumed 1 */
510 }
511 return result;
512}
513
514
515int r600_nomm_get_vb(struct drm_device *dev)
516{
517 drm_radeon_private_t *dev_priv = dev->dev_private;
518 dev_priv->blit_vb = radeon_freelist_get(dev);
519 if (!dev_priv->blit_vb) {
520 DRM_ERROR("Unable to allocate vertex buffer for blit\n");
521 return -EAGAIN;
522 }
523 return 0;
524}
525
526void r600_nomm_put_vb(struct drm_device *dev)
527{
528 drm_radeon_private_t *dev_priv = dev->dev_private;
529
530 dev_priv->blit_vb->used = 0;
531 radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
532}
533
534void *r600_nomm_get_vb_ptr(struct drm_device *dev)
535{
536 drm_radeon_private_t *dev_priv = dev->dev_private;
537 return (((char *)dev->agp_buffer_map->handle +
538 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
539}
540
541int
542r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
543{
544 drm_radeon_private_t *dev_priv = dev->dev_private;
545 DRM_DEBUG("\n");
546
547 r600_nomm_get_vb(dev);
548
549 dev_priv->blit_vb->file_priv = file_priv;
550
551 set_default_state(dev_priv);
552 set_shaders(dev);
553
554 return 0;
555}
556
557
558void
559r600_done_blit_copy(struct drm_device *dev)
560{
561 drm_radeon_private_t *dev_priv = dev->dev_private;
562 RING_LOCALS;
563 DRM_DEBUG("\n");
564
565 BEGIN_RING(5);
566 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
567 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
568 /* wait for 3D idle clean */
569 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
570 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
571 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
572
573 ADVANCE_RING();
574 COMMIT_RING();
575
576 r600_nomm_put_vb(dev);
577}
578
579void
580r600_blit_copy(struct drm_device *dev,
581 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
582 int size_bytes)
583{
584 drm_radeon_private_t *dev_priv = dev->dev_private;
585 int max_bytes;
586 u64 vb_addr;
587 u32 *vb;
588
589 vb = r600_nomm_get_vb_ptr(dev);
590
591 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
592 max_bytes = 8192;
593
594 while (size_bytes) {
595 int cur_size = size_bytes;
596 int src_x = src_gpu_addr & 255;
597 int dst_x = dst_gpu_addr & 255;
598 int h = 1;
599 src_gpu_addr = src_gpu_addr & ~255;
600 dst_gpu_addr = dst_gpu_addr & ~255;
601
602 if (!src_x && !dst_x) {
603 h = (cur_size / max_bytes);
604 if (h > 8192)
605 h = 8192;
606 if (h == 0)
607 h = 1;
608 else
609 cur_size = max_bytes;
610 } else {
611 if (cur_size > max_bytes)
612 cur_size = max_bytes;
613 if (cur_size > (max_bytes - dst_x))
614 cur_size = (max_bytes - dst_x);
615 if (cur_size > (max_bytes - src_x))
616 cur_size = (max_bytes - src_x);
617 }
618
619 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
620
621 r600_nomm_put_vb(dev);
622 r600_nomm_get_vb(dev);
623 if (!dev_priv->blit_vb)
624 return;
625 set_shaders(dev);
626 vb = r600_nomm_get_vb_ptr(dev);
627 }
628
629 vb[0] = i2f(dst_x);
630 vb[1] = 0;
631 vb[2] = i2f(src_x);
632 vb[3] = 0;
633
634 vb[4] = i2f(dst_x);
635 vb[5] = i2f(h);
636 vb[6] = i2f(src_x);
637 vb[7] = i2f(h);
638
639 vb[8] = i2f(dst_x + cur_size);
640 vb[9] = i2f(h);
641 vb[10] = i2f(src_x + cur_size);
642 vb[11] = i2f(h);
643
644 /* src */
645 set_tex_resource(dev_priv, FMT_8,
646 src_x + cur_size, h, src_x + cur_size,
647 src_gpu_addr);
648
649 cp_set_surface_sync(dev_priv,
650 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
651
652 /* dst */
653 set_render_target(dev_priv, COLOR_8,
654 dst_x + cur_size, h,
655 dst_gpu_addr);
656
657 /* scissors */
658 set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
659
660 /* Vertex buffer setup */
661 vb_addr = dev_priv->gart_buffers_offset +
662 dev_priv->blit_vb->offset +
663 dev_priv->blit_vb->used;
664 set_vtx_resource(dev_priv, vb_addr);
665
666 /* draw */
667 draw_auto(dev_priv);
668
669 cp_set_surface_sync(dev_priv,
670 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
671 cur_size * h, dst_gpu_addr);
672
673 vb += 12;
674 dev_priv->blit_vb->used += 12 * 4;
675
676 src_gpu_addr += cur_size * h;
677 dst_gpu_addr += cur_size * h;
678 size_bytes -= cur_size * h;
679 }
680 } else {
681 max_bytes = 8192 * 4;
682
683 while (size_bytes) {
684 int cur_size = size_bytes;
685 int src_x = (src_gpu_addr & 255);
686 int dst_x = (dst_gpu_addr & 255);
687 int h = 1;
688 src_gpu_addr = src_gpu_addr & ~255;
689 dst_gpu_addr = dst_gpu_addr & ~255;
690
691 if (!src_x && !dst_x) {
692 h = (cur_size / max_bytes);
693 if (h > 8192)
694 h = 8192;
695 if (h == 0)
696 h = 1;
697 else
698 cur_size = max_bytes;
699 } else {
700 if (cur_size > max_bytes)
701 cur_size = max_bytes;
702 if (cur_size > (max_bytes - dst_x))
703 cur_size = (max_bytes - dst_x);
704 if (cur_size > (max_bytes - src_x))
705 cur_size = (max_bytes - src_x);
706 }
707
708 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
709 r600_nomm_put_vb(dev);
710 r600_nomm_get_vb(dev);
711 if (!dev_priv->blit_vb)
712 return;
713
714 set_shaders(dev);
715 vb = r600_nomm_get_vb_ptr(dev);
716 }
717
718 vb[0] = i2f(dst_x / 4);
719 vb[1] = 0;
720 vb[2] = i2f(src_x / 4);
721 vb[3] = 0;
722
723 vb[4] = i2f(dst_x / 4);
724 vb[5] = i2f(h);
725 vb[6] = i2f(src_x / 4);
726 vb[7] = i2f(h);
727
728 vb[8] = i2f((dst_x + cur_size) / 4);
729 vb[9] = i2f(h);
730 vb[10] = i2f((src_x + cur_size) / 4);
731 vb[11] = i2f(h);
732
733 /* src */
734 set_tex_resource(dev_priv, FMT_8_8_8_8,
735 (src_x + cur_size) / 4,
736 h, (src_x + cur_size) / 4,
737 src_gpu_addr);
738
739 cp_set_surface_sync(dev_priv,
740 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
741
742 /* dst */
743 set_render_target(dev_priv, COLOR_8_8_8_8,
744 dst_x + cur_size, h,
745 dst_gpu_addr);
746
747 /* scissors */
748 set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
749
750 /* Vertex buffer setup */
751 vb_addr = dev_priv->gart_buffers_offset +
752 dev_priv->blit_vb->offset +
753 dev_priv->blit_vb->used;
754 set_vtx_resource(dev_priv, vb_addr);
755
756 /* draw */
757 draw_auto(dev_priv);
758
759 cp_set_surface_sync(dev_priv,
760 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
761 cur_size * h, dst_gpu_addr);
762
763 vb += 12;
764 dev_priv->blit_vb->used += 12 * 4;
765
766 src_gpu_addr += cur_size * h;
767 dst_gpu_addr += cur_size * h;
768 size_bytes -= cur_size * h;
769 }
770 }
771}
772
773void
774r600_blit_swap(struct drm_device *dev,
775 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
776 int sx, int sy, int dx, int dy,
777 int w, int h, int src_pitch, int dst_pitch, int cpp)
778{
779 drm_radeon_private_t *dev_priv = dev->dev_private;
780 int cb_format, tex_format;
781 u64 vb_addr;
782 u32 *vb;
783
784 vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
785 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
786
787 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
788
789 r600_nomm_put_vb(dev);
790 r600_nomm_get_vb(dev);
791 if (!dev_priv->blit_vb)
792 return;
793
794 set_shaders(dev);
795 vb = r600_nomm_get_vb_ptr(dev);
796 }
797
798 if (cpp == 4) {
799 cb_format = COLOR_8_8_8_8;
800 tex_format = FMT_8_8_8_8;
801 } else if (cpp == 2) {
802 cb_format = COLOR_5_6_5;
803 tex_format = FMT_5_6_5;
804 } else {
805 cb_format = COLOR_8;
806 tex_format = FMT_8;
807 }
808
809 vb[0] = i2f(dx);
810 vb[1] = i2f(dy);
811 vb[2] = i2f(sx);
812 vb[3] = i2f(sy);
813
814 vb[4] = i2f(dx);
815 vb[5] = i2f(dy + h);
816 vb[6] = i2f(sx);
817 vb[7] = i2f(sy + h);
818
819 vb[8] = i2f(dx + w);
820 vb[9] = i2f(dy + h);
821 vb[10] = i2f(sx + w);
822 vb[11] = i2f(sy + h);
823
824 /* src */
825 set_tex_resource(dev_priv, tex_format,
826 src_pitch / cpp,
827 sy + h, src_pitch / cpp,
828 src_gpu_addr);
829
830 cp_set_surface_sync(dev_priv,
831 R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr);
832
833 /* dst */
834 set_render_target(dev_priv, cb_format,
835 dst_pitch / cpp, dy + h,
836 dst_gpu_addr);
837
838 /* scissors */
839 set_scissors(dev_priv, dx, dy, dx + w, dy + h);
840
841 /* Vertex buffer setup */
842 vb_addr = dev_priv->gart_buffers_offset +
843 dev_priv->blit_vb->offset +
844 dev_priv->blit_vb->used;
845 set_vtx_resource(dev_priv, vb_addr);
846
847 /* draw */
848 draw_auto(dev_priv);
849
850 cp_set_surface_sync(dev_priv,
851 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
852 dst_pitch * (dy + h), dst_gpu_addr);
853
854 dev_priv->blit_vb->used += 12 * 4;
855}